├── docs ├── logo.png ├── favicon.ico ├── articles │ ├── download_lawsuit_cache │ │ └── html │ │ │ ├── unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.rdb │ │ │ ├── unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.rdb │ │ │ ├── unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.rdb │ │ │ ├── unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.rdb │ │ │ ├── __packages │ │ │ ├── unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.rdx │ │ │ ├── unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.rdx │ │ │ ├── unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdb │ │ │ ├── unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdx │ │ │ ├── unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdb │ │ │ ├── unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdx │ │ │ ├── unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdb │ │ │ ├── unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdx │ │ │ ├── unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdb │ │ │ ├── unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdx │ │ │ ├── unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.rdx │ │ │ ├── unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.rdx │ │ │ ├── unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdb │ │ │ ├── unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdx │ │ │ ├── unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.RData │ │ │ ├── unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.RData │ │ │ ├── unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.RData │ │ │ ├── unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.RData │ │ │ ├── unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.RData │ │ │ ├── unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.RData │ │ │ ├── unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.RData │ │ │ ├── unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.RData │ │ │ └── unnamed-chunk-5_2911596ed16555e1452221c44d068e43.RData │ └── index.html ├── reference │ ├── figures │ │ └── logo.png │ ├── make_parser.html │ ├── pipe.html │ ├── op-definition.html │ ├── parse_data.html │ ├── parse_parts.html │ ├── parse_movs.html │ ├── parse_decisions.html │ ├── run_parser.html │ ├── peek_cjpg.html │ ├── peek_cjsg.html │ ├── download_decision.html │ ├── download_2deg_lawsuit.html │ ├── download_lawsuit.html │ ├── cjpg_table.html │ ├── cjsg_table.html │ ├── parse_cjpg.html │ └── parse_cjsg.html ├── pkgdown.yml ├── tocBullet.svg ├── link.svg ├── pkgdown.js ├── jquery.sticky-kit.min.js ├── authors.html └── pkgdown.css ├── tests ├── testthat.R └── testthat │ ├── test_download_decision.R │ ├── test_download_cposg.R │ ├── test_download_cpopg.R │ ├── test_cjxg_table.R │ ├── test_parse_cpoxg.R │ └── test_download_cjxg.R ├── man ├── figures │ └── logo.png ├── pipe.Rd ├── parse_data.Rd ├── parse_hist.Rd ├── parse_movs.Rd ├── parse_parts.Rd ├── parse_pd.Rd ├── make_parser.Rd ├── parse_hearings.Rd ├── parse_decisions.Rd ├── parse_data.cpopg.Rd ├── parse_data.cposg.Rd ├── parse_hist.cpopg.Rd ├── parse_movs.cpopg.Rd ├── parse_parts.cpopg.Rd ├── parse_parts.cposg.Rd ├── parse_movs.cposg.Rd ├── parse_pd.cpopg.Rd ├── parse_hearings.cpopg.Rd ├── parse_decisions.cposg.Rd ├── parse_cjpg_lawsuit.Rd ├── parse_cjsg_lawsuit.Rd ├── parse_cjpg_.Rd ├── parse_cjsg_.Rd ├── run_parser.Rd ├── parse_cpopg_all.Rd ├── parse_cposg_all.Rd ├── peek_cjpg.Rd ├── peek_cjsg.Rd ├── download_decision.Rd ├── cjpg_table.Rd ├── cjsg_table.Rd ├── parse_cjpg.Rd ├── parse_cjsg.Rd ├── download_cposg.Rd ├── download_cpopg.Rd ├── download_cjsg_tjmg.Rd ├── browse_table.Rd ├── download_cjpg.Rd └── download_cjsg.Rd ├── .gitignore ├── .Rbuildignore ├── .travis.yml ├── esaj.Rproj ├── NAMESPACE ├── DESCRIPTION ├── appveyor.yml ├── R ├── aux_colors.R ├── aux_parser.R ├── cposg.R ├── cpopg.R ├── decisions.R ├── cjsg_table.R ├── parser.R ├── parse_cjpg.R ├── aux_captcha.R ├── parse_cposg.R ├── parse_cjsg.R ├── utils.R └── cjpg_table.R ├── _pkgdown.yml ├── vignettes ├── download_decision.Rmd └── download_lawsuit.Rmd ├── README.Rmd └── README.md /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(esaj) 3 | 4 | test_check("esaj") 5 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | .DS_Store 6 | .Rapp.history 7 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/__packages: -------------------------------------------------------------------------------- 1 | base 2 | esaj 3 | bindrcpp 4 | randomForest 5 | -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^\.travis\.yml$ 5 | ^appveyor\.yml$ 6 | ^docs$ 7 | ^_pkgdown\.yml$ 8 | ^LICENSE$ 9 | ^README\.Rmd$ 10 | -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdb -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdb -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdb -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdb -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdb -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.rdx -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_59c8f78757a769273ec1da46c5c4a5a7.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-1_9f30ef9b8f3757d8fae82f274ac5d7f1.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_27fe08e4a62b2294154e3ac0e08ff2aa.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-2_6c54e0d5b3d0a485ddc297ef127b1a19.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_aa0b0cb211f7f17319fd2f0247a0cfd1.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-3_e6f1b7bb9418c409ba306f182b0777d6.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_2cebd89ec883d262224fce8263e2e483.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-4_f23b500082f72ad3c1e33f3909c0d7b3.RData -------------------------------------------------------------------------------- /docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/courtsbr/esaj/HEAD/docs/articles/download_lawsuit_cache/html/unnamed-chunk-5_2911596ed16555e1452221c44d068e43.RData -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \description{ 7 | See \code{\link[magrittr]{\%>\%}} for more details. 8 | } 9 | \keyword{internal} 10 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | urls: 2 | reference: http://courtsbr.github.io/esaj/reference 3 | article: http://courtsbr.github.io/esaj/articles 4 | articles: 5 | download_decision: download_decision.html 6 | download_lawsuit: download_lawsuit.html 7 | download_query: download_query.html 8 | 9 | -------------------------------------------------------------------------------- /man/parse_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_data} 4 | \alias{parse_data} 5 | \title{Parse data} 6 | \usage{ 7 | parse_data(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse data 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_hist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_hist} 4 | \alias{parse_hist} 5 | \title{Parse history} 6 | \usage{ 7 | parse_hist(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse history 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_movs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_movs} 4 | \alias{parse_movs} 5 | \title{Parse movements} 6 | \usage{ 7 | parse_movs(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse movements 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_parts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_parts} 4 | \alias{parse_parts} 5 | \title{Parse parts} 6 | \usage{ 7 | parse_parts(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse parts 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_pd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_pd} 4 | \alias{parse_pd} 5 | \title{Parse police department} 6 | \usage{ 7 | parse_pd(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse police department 14 | } 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | addons: 4 | apt: 5 | packages: 6 | - libv8-dev 7 | - valgrind 8 | - libtesseract-dev 9 | - libleptonica-dev 10 | - tesseract-ocr-eng 11 | - libpoppler-cpp-dev 12 | - libmagick++-dev 13 | 14 | language: R 15 | sudo: false 16 | cache: packages 17 | -------------------------------------------------------------------------------- /man/make_parser.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{make_parser} 4 | \alias{make_parser} 5 | \title{Create a parser skeleton} 6 | \usage{ 7 | make_parser(type = "cposg") 8 | } 9 | \arguments{ 10 | \item{type}{The type of parser (\code{"cpopg"} or \code{"cposg"})} 11 | } 12 | \description{ 13 | Create a parser skeleton 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_hearings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_hearings} 4 | \alias{parse_hearings} 5 | \title{Parse hearings} 6 | \usage{ 7 | parse_hearings(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse hearings 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_decisions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aux_parser.R 3 | \name{parse_decisions} 4 | \alias{parse_decisions} 5 | \title{Parse decisions} 6 | \usage{ 7 | parse_decisions(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse decisions 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_data.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_data.cpopg} 4 | \alias{parse_data.cpopg} 5 | \title{Parse data} 6 | \usage{ 7 | \method{parse_data}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse data 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_data.cposg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cposg.R 3 | \name{parse_data.cposg} 4 | \alias{parse_data.cposg} 5 | \title{Parses data} 6 | \usage{ 7 | \method{parse_data}{cposg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parses data 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_hist.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_hist.cpopg} 4 | \alias{parse_hist.cpopg} 5 | \title{Parse history} 6 | \usage{ 7 | \method{parse_hist}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse history 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_movs.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_movs.cpopg} 4 | \alias{parse_movs.cpopg} 5 | \title{Parse movements} 6 | \usage{ 7 | \method{parse_movs}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse movements 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_parts.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_parts.cpopg} 4 | \alias{parse_parts.cpopg} 5 | \title{Parse parts} 6 | \usage{ 7 | \method{parse_parts}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse parts 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_parts.cposg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cposg.R 3 | \name{parse_parts.cposg} 4 | \alias{parse_parts.cposg} 5 | \title{Parses parts} 6 | \usage{ 7 | \method{parse_parts}{cposg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parses parts 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_movs.cposg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cposg.R 3 | \name{parse_movs.cposg} 4 | \alias{parse_movs.cposg} 5 | \title{Parses movements} 6 | \usage{ 7 | \method{parse_movs}{cposg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parses movements 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_pd.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_pd.cpopg} 4 | \alias{parse_pd.cpopg} 5 | \title{Parse police department} 6 | \usage{ 7 | \method{parse_pd}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse police department 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_hearings.cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cpopg.R 3 | \name{parse_hearings.cpopg} 4 | \alias{parse_hearings.cpopg} 5 | \title{Parse hearings} 6 | \usage{ 7 | \method{parse_hearings}{cpopg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parse hearings 14 | } 15 | -------------------------------------------------------------------------------- /man/parse_decisions.cposg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cposg.R 3 | \name{parse_decisions.cposg} 4 | \alias{parse_decisions.cposg} 5 | \title{Parses decisions} 6 | \usage{ 7 | \method{parse_decisions}{cposg}(parser) 8 | } 9 | \arguments{ 10 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 11 | } 12 | \description{ 13 | Parses decisions 14 | } 15 | -------------------------------------------------------------------------------- /esaj.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/parse_cjpg_lawsuit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjpg.R 3 | \name{parse_cjpg_lawsuit} 4 | \alias{parse_cjpg_lawsuit} 5 | \title{Parse one lawsuit from a CJSG page} 6 | \usage{ 7 | parse_cjpg_lawsuit(node) 8 | } 9 | \arguments{ 10 | \item{node}{A \code{.fundocinza1} node extracted from the page} 11 | } 12 | \value{ 13 | One row with the data concerning the lawsuit 14 | } 15 | \description{ 16 | Parse one lawsuit from a CJSG page 17 | } 18 | -------------------------------------------------------------------------------- /man/parse_cjsg_lawsuit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjsg.R 3 | \name{parse_cjsg_lawsuit} 4 | \alias{parse_cjsg_lawsuit} 5 | \title{Parse one lawsuit from a CJSG page} 6 | \usage{ 7 | parse_cjsg_lawsuit(node) 8 | } 9 | \arguments{ 10 | \item{node}{A \code{.fundocinza1} node extracted from the page} 11 | } 12 | \value{ 13 | One row with the data concerning the lawsuit 14 | } 15 | \description{ 16 | Parse one lawsuit from a CJSG page 17 | } 18 | -------------------------------------------------------------------------------- /man/parse_cjpg_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjpg.R 3 | \name{parse_cjpg_} 4 | \alias{parse_cjpg_} 5 | \title{Parse a page of CJPG results} 6 | \usage{ 7 | parse_cjpg_(file, pb = NULL) 8 | } 9 | \arguments{ 10 | \item{file}{The path to the file to be parsed} 11 | 12 | \item{pb}{Progress bar created by \code{\link[=parse_cjpg]{parse_cjpg()}}} 13 | } 14 | \value{ 15 | A tibble with the parsed information 16 | } 17 | \description{ 18 | Parse a page of CJPG results 19 | } 20 | -------------------------------------------------------------------------------- /man/parse_cjsg_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjsg.R 3 | \name{parse_cjsg_} 4 | \alias{parse_cjsg_} 5 | \title{Parse a page of CJSG results} 6 | \usage{ 7 | parse_cjsg_(file, pb = NULL) 8 | } 9 | \arguments{ 10 | \item{file}{The path to the file to be parsed} 11 | 12 | \item{pb}{Progress bar created by \code{\link[=parse_cjsg]{parse_cjsg()}}} 13 | } 14 | \value{ 15 | A tibble with the parsed information 16 | } 17 | \description{ 18 | Parse a page of CJSG results 19 | } 20 | -------------------------------------------------------------------------------- /man/run_parser.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{run_parser} 4 | \alias{run_parser} 5 | \title{Run a parser} 6 | \usage{ 7 | run_parser(file, parser, path = ".", cores = 1) 8 | } 9 | \arguments{ 10 | \item{file}{A character vector with the paths to one ore more files} 11 | 12 | \item{parser}{A parser returned by \code{\link[=make_parser]{make_parser()}}} 13 | 14 | \item{path}{The path to a directory where to save RDSs} 15 | 16 | \item{cores}{The number of cores to be used when parsing} 17 | } 18 | \description{ 19 | Run a parser 20 | } 21 | -------------------------------------------------------------------------------- /man/parse_cpopg_all.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{parse_cpopg_all} 4 | \alias{parse_cpopg_all} 5 | \title{Shortcut for creating and running a complete CPOPG parser} 6 | \usage{ 7 | parse_cpopg_all(file, path = ".", cores = 1) 8 | } 9 | \arguments{ 10 | \item{file}{A character vector with the paths to one ore more files} 11 | 12 | \item{path}{The path to a directory where to save RDSs} 13 | 14 | \item{cores}{The number of cores to be used when parsing} 15 | } 16 | \description{ 17 | Shortcut for creating and running a complete CPOPG parser 18 | } 19 | -------------------------------------------------------------------------------- /man/parse_cposg_all.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parser.R 3 | \name{parse_cposg_all} 4 | \alias{parse_cposg_all} 5 | \title{Shortcut for creating and running a complete CPOSG parser} 6 | \usage{ 7 | parse_cposg_all(file, path = ".", cores = 1) 8 | } 9 | \arguments{ 10 | \item{file}{A character vector with the paths to one ore more files} 11 | 12 | \item{path}{The path to a directory where to save RDSs} 13 | 14 | \item{cores}{The number of cores to be used when parsing} 15 | } 16 | \description{ 17 | Shortcut for creating and running a complete CPOSG parser 18 | } 19 | -------------------------------------------------------------------------------- /tests/testthat/test_download_decision.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("download_decision") 3 | 4 | test_that("download_decision() is working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Create temporary directory 12 | path <- tempdir() 13 | 14 | # Download one decision 15 | file <- download_decision("10000034", path) 16 | expect_gt(file.info(file)$size, 40000) 17 | 18 | # Download more than one decision 19 | files <- download_decision(c("10800758", "10000034"), path) 20 | expect_true(all(file.info(files)$size > 40000)) 21 | }) 22 | -------------------------------------------------------------------------------- /man/peek_cjpg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjpg.R 3 | \name{peek_cjpg} 4 | \alias{peek_cjpg} 5 | \title{Check how long a call to \code{\link[=download_cjpg]{download_cjpg()}} will probably take} 6 | \usage{ 7 | peek_cjpg(...) 8 | } 9 | \arguments{ 10 | \item{...}{Arguments passed on to \code{\link[=download_cjpg]{download_cjpg()}} ( 11 | \code{path} will be ignored)} 12 | } 13 | \description{ 14 | Check how long a call to \code{\link[=download_cjpg]{download_cjpg()}} will probably take 15 | } 16 | \seealso{ 17 | \code{\link[=download_cjpg]{download_cjpg()}}, \code{\link[=cjpg_table]{cjpg_table()}} 18 | } 19 | -------------------------------------------------------------------------------- /man/peek_cjsg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjsg.R 3 | \name{peek_cjsg} 4 | \alias{peek_cjsg} 5 | \title{Check how long a call to \code{\link[=download_cjsg]{download_cjsg()}} will probably take} 6 | \usage{ 7 | peek_cjsg(...) 8 | } 9 | \arguments{ 10 | \item{...}{Arguments passed on to \code{\link[=download_cjsg]{download_cjsg()}} ( 11 | \code{path} will be ignored)} 12 | } 13 | \description{ 14 | Check how long a call to \code{\link[=download_cjsg]{download_cjsg()}} will probably take 15 | } 16 | \seealso{ 17 | \code{\link[=download_cjpg]{download_cjpg()}}, \code{\link[=cjpg_table]{cjpg_table()}} 18 | } 19 | -------------------------------------------------------------------------------- /docs/tocBullet.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /man/download_decision.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decisions.R 3 | \name{download_decision} 4 | \alias{download_decision} 5 | \title{Download PDFs corresponding to judicial decisions} 6 | \usage{ 7 | download_decision(decision, path = ".", tj = "tjsp") 8 | } 9 | \arguments{ 10 | \item{decision}{A character vector with decision IDs} 11 | 12 | \item{path}{Path to directory where to save PDF} 13 | 14 | \item{tj}{TJ from which to get data (only works with TJSP for now)} 15 | } 16 | \value{ 17 | A character vector with the path to the downloaded file 18 | } 19 | \description{ 20 | Given the unique identifier of a judicial decision 21 | (an 8 digit number), this function downloads the corresponding 22 | PDF. 23 | } 24 | -------------------------------------------------------------------------------- /tests/testthat/test_download_cposg.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("download_cposg") 3 | 4 | test_that("download_cposg() is working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Create temporary directory 12 | path <- tempdir() 13 | 14 | # Download one file 15 | file <- download_cposg("1001869-51.2017.8.26.0562", path) 16 | expect_gt(file.info(file)$size, 80000) 17 | 18 | # Run more than one download 19 | more <- download_cposg( 20 | c("1001869-51.2017.8.26.0562", 21 | "1001214-07.2016.8.26.0565"), path) 22 | expect_true(all(file.info(more)$size > 80000)) 23 | 24 | # Check if we get the right error 25 | expect_error(download_cposg(".", path), "Invalid ID") 26 | }) 27 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(parse_data,cpopg) 4 | S3method(parse_data,cposg) 5 | S3method(parse_decisions,cposg) 6 | S3method(parse_hearings,cpopg) 7 | S3method(parse_hist,cpopg) 8 | S3method(parse_movs,cpopg) 9 | S3method(parse_movs,cposg) 10 | S3method(parse_parts,cpopg) 11 | S3method(parse_parts,cposg) 12 | S3method(parse_pd,cpopg) 13 | export(browse_table) 14 | export(cjpg_table) 15 | export(cjsg_table) 16 | export(download_cjpg) 17 | export(download_cjsg) 18 | export(download_cpopg) 19 | export(download_cposg) 20 | export(download_decision) 21 | export(make_parser) 22 | export(parse_cjpg) 23 | export(parse_cjsg) 24 | export(parse_cpopg_all) 25 | export(parse_cposg_all) 26 | export(parse_data) 27 | export(parse_decisions) 28 | export(parse_hearings) 29 | export(parse_hist) 30 | export(parse_movs) 31 | export(parse_parts) 32 | export(parse_pd) 33 | export(peek_cjpg) 34 | export(peek_cjsg) 35 | export(run_parser) 36 | importFrom(magrittr,"%>%") 37 | -------------------------------------------------------------------------------- /man/cjpg_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjpg_table.R 3 | \name{cjpg_table} 4 | \alias{cjpg_table} 5 | \title{Download information about some of CJPG's structures} 6 | \usage{ 7 | cjpg_table(type, tj = "tjsp") 8 | } 9 | \arguments{ 10 | \item{type}{Either \code{"classes"} or \code{"subjects"} or \code{"courts"}} 11 | 12 | \item{tj}{TJ from which to get data (only works with TJSP for now)} 13 | } 14 | \value{ 15 | A tibble with either 12 columns (if \code{type} is \code{"classes"} 16 | or \code{"subjects"}) or 3 columns (if \code{type} is \code{"courts"}) 17 | } 18 | \description{ 19 | Downloads a table with information about lawsuit's 20 | classes, subjects or courts to help with \code{\link[=download_cjpg]{download_cjpg()}}. You 21 | can also browse some of these tables with \code{\link[=browse_table]{browse_table()}}. 22 | } 23 | \seealso{ 24 | \code{\link[=download_cjpg]{download_cjpg()}}, \code{\link[=browse_table]{browse_table()}} 25 | } 26 | -------------------------------------------------------------------------------- /man/cjsg_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjsg_table.R 3 | \name{cjsg_table} 4 | \alias{cjsg_table} 5 | \title{Download information about some of CJSG's structures} 6 | \usage{ 7 | cjsg_table(type, tj = "tjsp") 8 | } 9 | \arguments{ 10 | \item{type}{Either \code{"classes"} or \code{"subjects"} or \code{"courts"}} 11 | 12 | \item{tj}{TJ from which to get data (only works with TJSP for now)} 13 | } 14 | \value{ 15 | A tibble with either 12 columns (if \code{type} is \code{"classes"} 16 | or \code{"subjects"}) or 3 columns (if \code{type} is \code{"courts"}) 17 | } 18 | \description{ 19 | Downloads a table with information about lawsuit's 20 | classes, subjects or courts to help with \code{\link[=download_cjsg]{download_cjsg()}}. You 21 | can also browse some of these tables with \code{\link[=browse_table]{browse_table()}}. 22 | } 23 | \seealso{ 24 | \code{\link[=download_cjpg]{download_cjpg()}}, \code{\link[=browse_table]{browse_table()}} 25 | } 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: esaj 2 | Title: A scraper for all e-SAJ systems 3 | Version: 0.1.2.9000 4 | Authors@R: c( 5 | person("C", "Lente", , "lente@ime.usp.br", role = c("aut", "cre")), 6 | person("Fernando", "Correa", role = c("aut")), 7 | person("Julio", "Trecenti", , "j@jtrecenti.com", c("aut")), 8 | person("ABJ", role = c("cph", "fnd"))) 9 | Description: A simple interface that allows you to download first and second 10 | degree lawsuits from Brazils multiple e-SAJ portals. 11 | Depends: R (>= 3.2.3) 12 | License: GPL-2 13 | Imports: 14 | magrittr, 15 | dplyr, 16 | httr, 17 | jsonlite, 18 | lubridate, 19 | readr, 20 | stringr, 21 | tidyr, 22 | xml2, 23 | purrr, 24 | progress, 25 | tibble, 26 | rvest, 27 | rlang 28 | Suggests: 29 | knitr, 30 | rmarkdown, 31 | testthat, 32 | captchasaj, 33 | magick, 34 | tesseract, 35 | V8, 36 | decryptr 37 | VignetteBuilder: knitr 38 | LazyData: true 39 | RoxygenNote: 6.1.1 40 | Roxygen: list(markdown = TRUE, roclets = c("rd", "namespace", "collate")) 41 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | cache: 14 | - C:\RLibrary 15 | 16 | # Adapt as necessary starting from here 17 | 18 | build_script: 19 | - travis-tool.sh install_deps 20 | 21 | test_script: 22 | - travis-tool.sh run_tests 23 | 24 | on_failure: 25 | - 7z a failure.zip *.Rcheck\* 26 | - appveyor PushArtifact failure.zip 27 | 28 | artifacts: 29 | - path: '*.Rcheck\**\*.log' 30 | name: Logs 31 | 32 | - path: '*.Rcheck\**\*.out' 33 | name: Logs 34 | 35 | - path: '*.Rcheck\**\*.fail' 36 | name: Logs 37 | 38 | - path: '*.Rcheck\**\*.Rout' 39 | name: Logs 40 | 41 | - path: '\*_*.tar.gz' 42 | name: Bits 43 | 44 | - path: '\*_*.zip' 45 | name: Bits 46 | -------------------------------------------------------------------------------- /R/aux_colors.R: -------------------------------------------------------------------------------- 1 | 2 | # Filter captcha's requested color 3 | filter_color <- function(img_df, color_nm) { 4 | right_color <- eval(call(tolower(color_nm), img_df)) %>% 5 | head(1) %>% with(color) 6 | dplyr::filter(img_df, color == right_color) 7 | } 8 | 9 | # Orange 10 | laranja <- function(d) { 11 | dplyr::filter(d, b < .5) %>% 12 | dplyr::filter(r + g == max(r + g)) 13 | } 14 | 15 | # Blue 16 | azul <- function(d) { 17 | dplyr::filter(d, r < .5, g < .8) %>% 18 | dplyr::filter(b == max(b)) 19 | } 20 | 21 | # Green 22 | verde <- function(d) { 23 | dplyr::filter(d, r < .5, b < .5) %>% 24 | dplyr::filter(g == max(g)) 25 | } 26 | 27 | # Red 28 | vermelho <- function(d) { 29 | dplyr::filter(d, b < .5, g < .5) %>% 30 | dplyr::filter(r == max(r)) 31 | } 32 | 33 | # Pink 34 | rosa <- function(d) { 35 | dplyr::filter(d, g < .8) %>% 36 | dplyr::filter(r + b == max(r + b)) 37 | } 38 | 39 | # Purple 40 | roxo <- function(d) { 41 | dplyr::filter(d, g < .5) %>% 42 | dplyr::filter(r + b == max(r + b)) 43 | } 44 | 45 | # Black 46 | preto <- function(d) { 47 | dplyr::filter(d, r + b + g == min(r + b + g)) 48 | } 49 | -------------------------------------------------------------------------------- /man/parse_cjpg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjpg.R 3 | \name{parse_cjpg} 4 | \alias{parse_cjpg} 5 | \title{Parse lawsuits extracted from CJPG query} 6 | \usage{ 7 | parse_cjpg(file, cores = 1) 8 | } 9 | \arguments{ 10 | \item{file}{Character vector with the paths to one or more files} 11 | 12 | \item{cores}{Number of cores to use when parsing} 13 | } 14 | \value{ 15 | A tibble with the columns 16 | \itemize{ 17 | \item \code{file} Name of the file 18 | \item \code{id_lawsuit} Number of the lawsuit (doesn't have to be unique) 19 | \item \code{code_lawsuit} Unique code of the lawsuit 20 | \item \code{subject} Subject of the lawsuit 21 | \item \code{class} Class of the subject 22 | \item \code{district} Name of the district 23 | \item \code{date_available} Date when lawsuit was made available (\%d/\%m/\%Y) 24 | \item \code{jurisdiction} Name of the jurisdiction 25 | \item \code{judge} Name of the judge 26 | \item \code{court} Body responsible for the lawsuit 27 | \item \code{summary} Summary of the ruling 28 | } 29 | } 30 | \description{ 31 | Parse lawsuits extracted from CJPG query 32 | } 33 | -------------------------------------------------------------------------------- /tests/testthat/test_download_cpopg.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("download_cpopg") 3 | 4 | test_that("download_cpopg() is working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Create temporary directory 12 | path <- tempdir() 13 | 14 | # Run downloads 15 | am <- download_cpopg("02575182220138040001", path) # Amazonas 16 | sc <- download_cpopg("0303349-44.2014.8.24.0020", path) # Santa Catarina 17 | ba <- download_cpopg("0552486-62.2015.8.05.0001", path) # Bahia 18 | sp <- download_cpopg("0123479-07.2012.8.26.0100", path) # São Paulo 19 | 20 | # Expectations 21 | expect_gt(file.info(am)$size, 100000) 22 | # expect_gt(file.info(sc)$size, 100000) 23 | expect_gt(file.info(ba)$size, 100000) 24 | expect_gt(file.info(sp)$size, 100000) 25 | 26 | # Run more than one download 27 | more <- download_cpopg( 28 | c("0123479-07.2012.8.26.0100", 29 | "0552486-62.2015.8.05.0001"), path) 30 | expect_true(all(file.info(more)$size > 100000)) 31 | 32 | # Check if we get the right error 33 | expect_error(download_cpopg(".", path), "Invalid ID") 34 | }) 35 | -------------------------------------------------------------------------------- /man/parse_cjsg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_cjsg.R 3 | \name{parse_cjsg} 4 | \alias{parse_cjsg} 5 | \title{Parse lawsuits extracted from CJSG query} 6 | \usage{ 7 | parse_cjsg(file, cores = 1) 8 | } 9 | \arguments{ 10 | \item{file}{Character vector with the paths to one or more files} 11 | 12 | \item{cores}{Number of cores to use when parsing} 13 | } 14 | \value{ 15 | A tibble with the columns 16 | \itemize{ 17 | \item \code{file} Name of the file 18 | \item \code{id_page} ID found in the page 19 | \item \code{id_decision} Unique ID of the ruling 20 | \item \code{id_lawsuit} Number of the lawsuit (doesn't have to be unique) 21 | \item \code{class_subject} Class/subject, separated by slashes 22 | \item \code{district} Name of the district 23 | \item \code{court} Body responsible for the appeal 24 | \item \code{date_decision} Date of the judgement (\%d/\%m/\%Y) 25 | \item \code{date_publication} Date of the publication (\%d/\%m/\%Y) 26 | \item \code{date_registration} Date of registration in the system (\%d/\%m/\%Y) 27 | \item \code{rapporteur} Name of the rapporteur 28 | \item \code{summary} Summary of the ruling 29 | \item \code{txt_summary} Text of the summary with no formatting 30 | } 31 | } 32 | \description{ 33 | Parse lawsuits extracted from CJSG query 34 | } 35 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | $("#sidebar").stick_in_parent({ 3 | offset_top: $("#sidebar").offset().top 4 | }); 5 | $('body').scrollspy({ 6 | target: '#sidebar' 7 | }); 8 | 9 | var cur_path = paths(location.pathname); 10 | $("#navbar ul li a").each(function(index, value) { 11 | if (value.text == "Home") 12 | return; 13 | if (value.getAttribute("href") === "#") 14 | return; 15 | 16 | var path = paths(value.pathname); 17 | if (is_prefix(cur_path, path)) { 18 | // Add class to parent
  • , and enclosing
  • if in dropdown 19 | var menu_anchor = $(value); 20 | menu_anchor.parent().addClass("active"); 21 | menu_anchor.closest("li.dropdown").addClass("active"); 22 | } 23 | }); 24 | 25 | }); 26 | 27 | 28 | 29 | function paths(pathname) { 30 | var pieces = pathname.split("/"); 31 | pieces.shift(); // always starts with / 32 | 33 | var end = pieces[pieces.length - 1]; 34 | if (end === "index.html" || end === "") 35 | pieces.pop(); 36 | return(pieces); 37 | } 38 | 39 | function is_prefix(needle, haystack) { 40 | if (needle.length > haystack.lengh) 41 | return(false); 42 | 43 | for (var i = 0; i < haystack.length; i++) { 44 | if (needle[i] != haystack[i]) 45 | return(false); 46 | } 47 | 48 | return(true); 49 | } 50 | -------------------------------------------------------------------------------- /man/download_cposg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cposg.R 3 | \name{download_cposg} 4 | \alias{download_cposg} 5 | \title{Download second degree lawsuits filed in Brazilian Justice Courts} 6 | \usage{ 7 | download_cposg(id, path = ".") 8 | } 9 | \arguments{ 10 | \item{id}{A character vector of one or more lawsuit IDs (only works with 11 | TJSP for now)} 12 | 13 | \item{path}{Path to the directory where the lawsuit should be saved} 14 | } 15 | \value{ 16 | A character vector with the path to the downloaded lawsuit 17 | } 18 | \description{ 19 | This function downloads lawsuits as HTMLs. Given a lawsuit ID, and 20 | the path to a directory it will collect the lawsuit, and save it to 21 | the provided directory. 22 | } 23 | \section{About lawsuits}{ 24 | 25 | The lawsuits contemplated by this funtion have to be filed in a 26 | Brazilian Tribunal de Justica (Justice Court). \code{\link[=download_cposg]{download_cposg()}} 27 | finds the lawsuit in its state's online Sistema de Automacao de Justica 28 | (Justice Automation System), solves the captcha withholding the 29 | information, and collects the HTML. 30 | } 31 | 32 | \section{Implemented TJs}{ 33 | 34 | Unfortunatelly \code{\link[=download_cposg]{download_cposg()}} doesn't yet work with all 27 35 | TJs in Brazil. Here are the ones already implemented: 36 | \itemize{ 37 | \item TJSP (Sao Paulo) 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /tests/testthat/test_cjxg_table.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("cjxg_table") 3 | 4 | test_that("CJXG tables are working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Get the tables for classes and subjects 12 | classes_p <- cjpg_table("classes") 13 | subjects_p <- cjpg_table("subjects") 14 | courts_p <- cjpg_table("courts") 15 | 16 | classes_s <- cjsg_table("classes") 17 | subjects_s <- cjsg_table("subjects") 18 | courts_s <- cjsg_table("courts") 19 | 20 | testthat::expect_is(classes_p, "tbl") 21 | testthat::expect_is(subjects_p, "tbl") 22 | testthat::expect_is(courts_p, "tbl") 23 | testthat::expect_is(classes_s, "tbl") 24 | testthat::expect_is(subjects_s, "tbl") 25 | testthat::expect_is(courts_s, "tbl") 26 | 27 | # # Check dimensions of tables 28 | # expect_equal(dim(classes_p), c(769, 12)) 29 | # expect_equal(dim(subjects_p), c(6521, 12)) 30 | # expect_equal(dim(courts_p), c(2061, 3)) 31 | # 32 | # expect_equal(dim(classes_s), c(122, 12)) 33 | # expect_equal(dim(subjects_s), c(3088, 12)) 34 | # expect_equal(dim(courts_s), c(1208, 3)) 35 | # 36 | # # Check browsing works 37 | # expect_equal(dim(browse_table(classes_s, list(c("ADM", "CRIMINAL"), "", "", "", "", "Recurso"))), c(4, 12)) 38 | # expect_equal(dim(browse_table(subjects_s, list("DIREITO", "", "", "", "", c("Carta", "Parcelamento")))), c(9, 12)) 39 | }) 40 | -------------------------------------------------------------------------------- /man/download_cpopg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cpopg.R 3 | \name{download_cpopg} 4 | \alias{download_cpopg} 5 | \title{Download first degree lawsuits filed in Brazilian Justice Courts} 6 | \usage{ 7 | download_cpopg(id, path = ".") 8 | } 9 | \arguments{ 10 | \item{id}{A character vector of one or more lawsuit IDs} 11 | 12 | \item{path}{Path to the directory where the lawsuit should be saved} 13 | } 14 | \value{ 15 | A character vector with the path to the downloaded lawsuit 16 | } 17 | \description{ 18 | This function downloads lawsuits as HTMLs. Given a lawsuit ID, and 19 | the path to a directory it will collect the lawsuit, and save it to 20 | the provided directory. 21 | } 22 | \section{About lawsuits}{ 23 | 24 | The lawsuits contemplated by this funtion have to be filed in a 25 | Brazilian Tribunal de Justica (Justice Court). \code{\link[=download_cpopg]{download_cpopg()}} 26 | finds the lawsuit in its state's online Sistema de Automacao de Justica 27 | (Justice Automation System), solves the captcha withholding the 28 | information, and collects the HTML. 29 | } 30 | 31 | \section{Implemented TJs}{ 32 | 33 | Unfortunatelly \code{\link[=download_cpopg]{download_cpopg()}} doesn't yet work with all 27 TJs in 34 | Brazil. Here are the ones already implemented: 35 | \itemize{ 36 | \item TJAM (Amazonas) 37 | \item TJAL (Alagoas) 38 | \item TJBA (Bahia) 39 | \item TJSC (Santa Catarina) 40 | \item TJSP (Sao Paulo) 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: http://courtsbr.github.io/esaj 2 | 3 | template: 4 | package: tidytemplate 5 | 6 | home: 7 | strip_header: true 8 | 9 | links: 10 | - text: Browse source code 11 | href: https://github.com/courtsbr/esaj 12 | - text: Report a bug 13 | href: https://github.com/courtsbr/esaj/issues 14 | - text: Learn more about ABJ 15 | href: http://abjur.org.br 16 | 17 | reference: 18 | - title: CPOPG and CPOSG 19 | contents: 20 | - download_cpopg 21 | - download_cposg 22 | - title: CJPG and CJSG 23 | contents: 24 | - download_cjpg 25 | - download_cjsg 26 | - peek_cjpg 27 | - peek_cjsg 28 | - title: Tables 29 | contents: 30 | - cjpg_table 31 | - cjsg_table 32 | - browse_table 33 | - title: Decisions 34 | contents: 35 | - download_decision 36 | - title: Parsers 37 | contents: 38 | - parse_cjpg 39 | - parse_cjsg 40 | - make_parser 41 | - parse_data 42 | - parse_decisions 43 | - parse_movs 44 | - parse_parts 45 | - run_parser 46 | 47 | navbar: 48 | type: default 49 | left: 50 | - text: Intro 51 | href: index.html 52 | - text: Reference 53 | href: reference/index.html 54 | - text: Articles 55 | menu: 56 | - text: Downloading Lawsuits 57 | href: articles/download_lawsuit.html 58 | - text: Downloading Queries 59 | href: articles/download_query.html 60 | - text: Downloading Decisions 61 | href: articles/download_decision.html 62 | right: 63 | - icon: fa-github fa-lg 64 | href: https://github.com/courtsbr/esaj 65 | -------------------------------------------------------------------------------- /R/aux_parser.R: -------------------------------------------------------------------------------- 1 | 2 | #' Parse parts 3 | #' @param parser A parser returned by [make_parser()] 4 | #' @export 5 | parse_parts <- function(parser) { 6 | UseMethod("parse_parts", parser) 7 | } 8 | 9 | #' Parse data 10 | #' @param parser A parser returned by [make_parser()] 11 | #' @export 12 | parse_data <- function(parser) { 13 | UseMethod("parse_data", parser) 14 | } 15 | 16 | #' Parse decisions 17 | #' @param parser A parser returned by [make_parser()] 18 | #' @export 19 | parse_decisions <- function(parser) { 20 | UseMethod("parse_decisions", parser) 21 | } 22 | 23 | #' Parse movements 24 | #' @param parser A parser returned by [make_parser()] 25 | #' @export 26 | parse_movs <- function(parser) { 27 | UseMethod("parse_movs", parser) 28 | } 29 | 30 | #' Parse history 31 | #' @param parser A parser returned by [make_parser()] 32 | #' @export 33 | parse_hist <- function(parser) { 34 | UseMethod("parse_hist", parser) 35 | } 36 | 37 | #' Parse hearings 38 | #' @param parser A parser returned by [make_parser()] 39 | #' @export 40 | parse_hearings <- function(parser) { 41 | UseMethod("parse_hearings", parser) 42 | } 43 | 44 | #' Parse police department 45 | #' @param parser A parser returned by [make_parser()] 46 | #' @export 47 | parse_pd <- function(parser) { 48 | UseMethod("parse_pd", parser) 49 | } 50 | 51 | # Print parser 52 | print.parser <- function(x, ...) { 53 | if (length(x$name) == 0) { 54 | cat("An empty parser\n") 55 | } 56 | else { 57 | cat("A parser for the following objects:\n") 58 | purrr::walk(x$name, ~cat("- ", .x, "\n", sep = "")) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /man/download_cjsg_tjmg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjsg.R 3 | \name{download_cjsg_tjmg} 4 | \alias{download_cjsg_tjmg} 5 | \title{Temporary function for downloading TJMG's CJSG queries} 6 | \usage{ 7 | download_cjsg_tjmg(query, path = ".", classes = "", subjects = "", 8 | courts = "", trial_start = "", trial_end = "", 9 | registration_start = "", registration_end = "", min_page = 1, 10 | max_page = 1, rapporteurs = "") 11 | } 12 | \arguments{ 13 | \item{query}{Character vector with search query} 14 | 15 | \item{path}{Path to directory where to save HTMLs} 16 | 17 | \item{classes}{Character vector with class IDs (e.g. \code{c(175, 43, 259, 263)})} 18 | 19 | \item{subjects}{Character vector with subject IDs (e.g. \code{c(10207, 10008, 10199)})} 20 | 21 | \item{courts}{Character vector with court IDs (e.g. \code{c("1-7", "1-9", "2-3", "1-1")})} 22 | 23 | \item{trial_start}{Lower bound for trial date} 24 | 25 | \item{trial_end}{Upper bound for trial date} 26 | 27 | \item{registration_start}{Lower bound for registration date} 28 | 29 | \item{registration_end}{Upper bound for registration date} 30 | 31 | \item{min_page}{First page of results to download} 32 | 33 | \item{max_page}{Last page of results to download. If is \code{NA} or 34 | \code{Inf}, we use \code{\link{peek_cjsg}}.} 35 | 36 | \item{rapporteurs}{Character vector with rapporteur IDs (e.g. \code{c("2-1528561", "2-2345361")})} 37 | } 38 | \value{ 39 | A character vector with the paths to the downloaded files 40 | } 41 | \description{ 42 | Temporary function for downloading TJMG's CJSG queries 43 | } 44 | -------------------------------------------------------------------------------- /man/browse_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjpg_table.R 3 | \name{browse_table} 4 | \alias{browse_table} 5 | \title{Browse table returned by \code{\link[=cjpg_table]{cjpg_table()}} or \code{\link[=cjsg_table]{cjsg_table()}}} 6 | \usage{ 7 | browse_table(table, patterns) 8 | } 9 | \arguments{ 10 | \item{table}{Table returned by \code{\link[=cjpg_table]{cjpg_table()}} or \code{\link[=cjsg_table]{cjsg_table()}} 11 | (only valid for \code{"classes"} or \code{"subjects"} types)} 12 | 13 | \item{patterns}{A list containing (at most) 6 character vectors 14 | of one or more regular expressions (applied from left to right 15 | on \code{name0} to \code{name5}), e.g., 16 | \code{list(c("ADM", "CRIMINAL"), "", "", "", "", "Recurso")}} 17 | } 18 | \value{ 19 | The original table filtered according to \code{patterns} 20 | } 21 | \description{ 22 | This function uses a list of regex to filter CJPG 23 | and CJSG tables (only if they are of type \code{"classes"} or 24 | \code{"subjects"}) more easily than with \code{dplyr::select()}. For 25 | details on how the matching occurs, see \strong{Matching}. 26 | } 27 | \section{Matching}{ 28 | For the matching to work properly, \code{patterns} 29 | should be a list of at most 6 character vectors, each one 30 | containing either one or a vector of regular expressions to 31 | be applied from left to right on columns \code{name0} to \code{name5} 32 | (note that vectors are ORed and different elements are ANDed). 33 | Example: If \code{patterns} looks something like 34 | \code{list(c("ADM", "CRIMINAL"), "", "", "", "", "Recurso")}, 35 | then we'll get back the rows where \code{name0} contains "ADM" 36 | \strong{or} "CRIMINAL" \strong{and} where \code{name5} contains "Recurso". 37 | } 38 | 39 | \seealso{ 40 | \code{\link[=cjpg_table]{cjpg_table()}}, \code{\link[=cjsg_table]{cjsg_table()}} 41 | } 42 | -------------------------------------------------------------------------------- /man/download_cjpg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjpg.R 3 | \name{download_cjpg} 4 | \alias{download_cjpg} 5 | \title{Download results of a query on first degree lawsuits filed 6 | in Brazilian Justice Courts} 7 | \usage{ 8 | download_cjpg(query, path = ".", classes = "", subjects = "", 9 | courts = "", date_start = "", date_end = "", min_page = 1, 10 | max_page = 1, cores = 1, tj = "tjsp") 11 | } 12 | \arguments{ 13 | \item{query}{Character vector with search query} 14 | 15 | \item{path}{Path to directory where to save HTMLs} 16 | 17 | \item{classes}{Character vector with class IDs (see \code{\link[=cjpg_table]{cjpg_table()}})} 18 | 19 | \item{subjects}{Character vector with subject IDs (see \code{\link[=cjpg_table]{cjpg_table()}})} 20 | 21 | \item{courts}{Character vector with court IDs (see \code{\link[=cjpg_table]{cjpg_table()}})} 22 | 23 | \item{date_start}{Lower bound for date} 24 | 25 | \item{date_end}{Upper bound for date} 26 | 27 | \item{min_page}{First page of results to download} 28 | 29 | \item{max_page}{Last page of results to download} 30 | 31 | \item{cores}{The number of cores to be used when downloading} 32 | 33 | \item{tj}{TJ from which to get data (only works with TJSP for now)} 34 | } 35 | \value{ 36 | A character vector with the paths to the downloaded files 37 | } 38 | \description{ 39 | Downloads an HTML with the results obtained from 40 | querying a dataset of all first degree lawsuits and then one 41 | HTML for each page of results (at most \code{max_page} pages). \code{query} 42 | should be the string to look for in the lawsuits and \code{clases}, 43 | \code{courts}, etc. should be the filtering parameters (make sure 44 | to use \code{\link[=cjpg_table]{cjpg_table()}} to get lists of all valid codes for these 45 | arguments). 46 | } 47 | \seealso{ 48 | \code{\link[=cjpg_table]{cjpg_table()}}, \code{\link[=browse_table]{browse_table()}} 49 | } 50 | -------------------------------------------------------------------------------- /tests/testthat/test_parse_cpoxg.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("parse_cpoxg") 3 | 4 | test_that("Function for parsing CPOSG is working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Download 2nd degree lawsuits 12 | path <- tempdir() 13 | more <- download_cposg( 14 | c("1001869-51.2017.8.26.0562", 15 | "1001214-07.2016.8.26.0565"), path) 16 | 17 | # Create and run parser 18 | parser <- parse_decisions(parse_parts(parse_data(parse_movs(make_parser())))) 19 | info <- run_parser(more, parser, path) 20 | 21 | # Check info's shape 22 | expect_equal(nrow(info), 2) 23 | expect_equal(dim(info$movs[[1]]), c(21, 2)) 24 | expect_equal(dim(info$data[[1]]), c(11, 2)) 25 | expect_equal(dim(info$parts[[1]]), c(5, 4)) 26 | expect_equal(dim(info$decisions[[1]]), c(1, 2)) 27 | 28 | # Run parser on multiple cores 29 | # info <- run_parser(more, parser, path, 4) 30 | 31 | # Check info's shape 32 | # expect_equal(nrow(info), 2) 33 | # expect_equal(dim(info$movs[[1]]), c(19, 2)) 34 | # expect_equal(dim(info$data[[1]]), c(11, 2)) 35 | # expect_equal(dim(info$parts[[1]]), c(5, 4)) 36 | }) 37 | 38 | test_that("Function for parsing CPOPG is working", { 39 | 40 | # Skip tests when not run locally 41 | skip_on_cran() 42 | skip_on_travis() 43 | skip_on_appveyor() 44 | 45 | # Download 2nd degree lawsuits 46 | path <- tempdir() 47 | more <- download_cpopg( 48 | c("0123479-07.2012.8.26.0100"), path) 49 | 50 | # Create and run parser 51 | parser <- parse_pd(parse_hist(parse_hearings(parse_parts(parse_data(parse_movs(make_parser("cpopg"))))))) 52 | info <- run_parser(more, parser, path) 53 | 54 | # Check info's shape 55 | expect_equal(nrow(info), 1) 56 | expect_equal(dim(info$movs[[1]]), c(130, 3)) 57 | expect_equal(dim(info$data[[1]]), c(14, 2)) 58 | expect_equal(dim(info$parts[[1]]), c(4, 4)) 59 | expect_equal(dim(info$hearings[[1]]), c(1, 1)) 60 | expect_equal(dim(info$hist[[1]]), c(0, 5)) 61 | expect_equal(dim(info$pd[[1]]), c(0, 0)) 62 | 63 | }) 64 | -------------------------------------------------------------------------------- /vignettes/download_decision.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Downloading Decisions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Vignette Title} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r setup, include = FALSE} 11 | library(esaj) 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>") 15 | ``` 16 | 17 | Of all functions in the `esaj` package, `download_decision()` is probably the 18 | simplest. Unlike lawsuit and query functions, there is only one decision 19 | function and absolutelly no helper functions are necessary; it simply downloads 20 | the PDF belonging to a decision. 21 | 22 | It takes two arguments: a decision unique number (ID) and the path to a 23 | directory. It's important to note that the second argument isn't the path 24 | to a file like `readr::write_*()` functions expect, it's the path to a 25 | folder (which can, but not necessarily has to, exist). 26 | 27 | As of this writing, `download_decision()` only works for São Paulo's Justice 28 | Court (TJSP). 29 | 30 | ## Basic usage 31 | 32 | To download a decision, simply call `download_decision()` with a decision ID 33 | and a path. The function will return the path to the downloaded PDF. 34 | 35 | ```{r} 36 | download_decision("10000034", "~/Desktop/") 37 | ``` 38 | 39 | This function is vectorized, meaning that we can pass it a character vector of 40 | IDs to download more than one decision. 41 | 42 | ```{r} 43 | download_decision(c("10800758", "10000034"), "~/Desktop/") 44 | ``` 45 | 46 | As expected, it returns the paths to both downloaded PDFs. 47 | 48 | It's important to remember that `download_decision()` is breaking captchas in 49 | order to do its job (courtesey of the `decryptr` package). The function tries 50 | at most 10 times to break each captcha, so it's possible that it fails every 51 | time and isn't able to download the decision. 52 | 53 | If you find it super necessary to download every single decision, we recommend 54 | using `esaj:::download_decision_()`. With this non-exported function one can 55 | force a larger number of attempts or even get a more verbose output from the 56 | execution. 57 | -------------------------------------------------------------------------------- /tests/testthat/test_download_cjxg.R: -------------------------------------------------------------------------------- 1 | library(esaj) 2 | context("download_cjxg") 3 | 4 | test_that("download_cjxg() is working", { 5 | 6 | # Skip tests when not run locally 7 | skip_on_cran() 8 | skip_on_travis() 9 | skip_on_appveyor() 10 | 11 | # Create temporary directory 12 | path <- tempdir() 13 | 14 | # Check courts and registration 15 | courts_p <- download_cjpg("recurso", path, courts = "2-1", 16 | date_start = "2016-01-01", date_end = "2016-12-31") 17 | courts_s <- download_cjsg("recurso", path, courts = "0-56", 18 | registration_start = "1998-01-01", 19 | registration_end = "1998-12-31") 20 | expect_gt(file.info(courts_p)$size[2], 50000) 21 | expect_gt(file.info(courts_s)$size[2], 50000) 22 | 23 | # Check classes and trial 24 | classes_p <- download_cjpg("recurso", path, classes = c("8727", "8742")) 25 | classes_s <- download_cjsg("recurso", path, classes = c("1231", "1232"), 26 | trial_start = "2009-01-01", trial_end = "2009-12-31") 27 | expect_gt(file.info(classes_p)$size[2], 90000) 28 | expect_gt(file.info(classes_s)$size[2], 90000) 29 | 30 | # Check subjects and page 31 | subjects_p <- download_cjpg("recurso", path, subjects = "3372", max_page = 5) 32 | expect_length(subjects_p, 6) 33 | expect_true(all(file.info(subjects_p)$size > 100000)) 34 | subjects_s <- download_cjsg("recurso", path, subjects = "0", max_page = 5) 35 | expect_length(subjects_s, 6) 36 | expect_true(all(file.info(subjects_s)$size > 100000)) 37 | 38 | # Check cores 39 | # cores_p <- download_cjpg("recurso", path, subjects = "3372", max_page = 20, cores = 4) 40 | # expect_length(cores_p, 20) 41 | # expect_true(all(file.info(cores_p)$size > 100000)) 42 | # cores_s <- download_cjsg("recurso", path, subjects = "0", max_page = 20, cores = 4) 43 | # expect_length(cores_s, 20) 44 | # expect_true(all(file.info(cores_s)$size > 100000)) 45 | 46 | # See if peek_cjxg() works 47 | expect_message(peek_cjpg("recurso", classes = "8727"), 48 | "This should take around") 49 | expect_message(peek_cjsg("recurso", classes = c("1231", "1232")), 50 | "This should take around") 51 | }) 52 | -------------------------------------------------------------------------------- /R/cposg.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Download second degree lawsuits filed in Brazilian Justice Courts 3 | #' 4 | #' @description 5 | #' This function downloads lawsuits as HTMLs. Given a lawsuit ID, and 6 | #' the path to a directory it will collect the lawsuit, and save it to 7 | #' the provided directory. 8 | #' 9 | #' @section About lawsuits: 10 | #' The lawsuits contemplated by this funtion have to be filed in a 11 | #' Brazilian Tribunal de Justica (Justice Court). [download_cposg()] 12 | #' finds the lawsuit in its state's online Sistema de Automacao de Justica 13 | #' (Justice Automation System), solves the captcha withholding the 14 | #' information, and collects the HTML. 15 | #' 16 | #' @section Implemented TJs: 17 | #' Unfortunatelly [download_cposg()] doesn't yet work with all 27 18 | #' TJs in Brazil. Here are the ones already implemented: 19 | #' \itemize{ 20 | #' \item TJSP (Sao Paulo) 21 | #' } 22 | #' 23 | #' @param id A character vector of one or more lawsuit IDs (only works with 24 | #' TJSP for now) 25 | #' @param path Path to the directory where the lawsuit should be saved 26 | #' @return A character vector with the path to the downloaded lawsuit 27 | #' 28 | #' @export 29 | download_cposg <- function(id, path = ".") { 30 | 31 | # Normalize path 32 | dir.create(path, FALSE, TRUE) 33 | path <- normalizePath(path) %>% 34 | stringr::str_c("/") 35 | 36 | # Strip ID down 37 | id <- stringr::str_replace_all(id, "[^0-9]", "") 38 | if (any(stringr::str_length(id) != 20)) { stop("Invalid ID") } 39 | 40 | # Iterate over IDs 41 | download_cposg_ <- purrr::possibly(download_cposg_, "") 42 | pb <- progress::progress_bar$new( 43 | "Downloading [:bar] :percent eta: :eta", length(id)) 44 | downloaded <- c() 45 | for (i in seq_along(id)) { 46 | downloaded <- append(downloaded, download_cposg_(id[i], path)) 47 | pb$tick() 48 | } 49 | 50 | return(downloaded) 51 | } 52 | 53 | # Download one lawsuit 54 | download_cposg_ <- function(id, path) { 55 | 56 | # Choose appropriate download function 57 | if (get_n(id) %in% c("02", "26")) { download <- download_noc_lawsuit } 58 | else { stop("ID must refer to a TJSP lawsuit") } 59 | 60 | # Get URLs for the download 61 | data <- get_lwst_data(id, deg = 2) 62 | 63 | # If file exists, return it without downloading 64 | if (file.exists(stringr::str_c(path, id, ".html"))) { 65 | return(stringr::str_c(path, id, ".html")) 66 | } 67 | 68 | # Download lawsuit 69 | download(id, path, data$u_captcha, data$u_search, cposg_query(id)) 70 | } 71 | -------------------------------------------------------------------------------- /man/download_cjsg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cjsg.R 3 | \name{download_cjsg} 4 | \alias{download_cjsg} 5 | \title{Download results of a query on second degree lawsuits filed in 6 | Brazilian Justice Courts} 7 | \usage{ 8 | download_cjsg(query, path = ".", classes = "", subjects = "", 9 | courts = "", trial_start = "", trial_end = "", 10 | registration_start = "", registration_end = "", min_page = 1, 11 | max_page = 1, cores = 1, wait = 0.5, tj = "tjsp", ...) 12 | } 13 | \arguments{ 14 | \item{query}{Character vector with search query} 15 | 16 | \item{path}{Path to directory where to save HTMLs} 17 | 18 | \item{classes}{Character vector with class IDs (see \code{\link[=cjsg_table]{cjsg_table()}})} 19 | 20 | \item{subjects}{Character vector with subject IDs (see \code{\link[=cjsg_table]{cjsg_table()}})} 21 | 22 | \item{courts}{Character vector with court IDs (see \code{\link[=cjsg_table]{cjsg_table()}})} 23 | 24 | \item{trial_start}{Lower bound for trial date} 25 | 26 | \item{trial_end}{Upper bound for trial date} 27 | 28 | \item{registration_start}{Lower bound for registration date} 29 | 30 | \item{registration_end}{Upper bound for registration date} 31 | 32 | \item{min_page}{First page of results to download} 33 | 34 | \item{max_page}{Last page of results to download. If is \code{NA} or 35 | \code{Inf}, we use \code{\link{peek_cjsg}}.} 36 | 37 | \item{cores}{The number of cores to be used when downloading. If you use more 38 | than one core and is dowloading more than 15 pages, you will probably have 39 | your IP blocked.} 40 | 41 | \item{wait}{Seconds to wait between downloads. Does not work properly if 42 | \code{cores} is greater than one, so you will probably have your IP blocked 43 | anyway.} 44 | 45 | \item{tj}{TJ from which to get data (only works with TJSP for now)} 46 | 47 | \item{...}{Param \code{rapporteurs} for \code{\link[=download_cjsg_tjmg]{download_cjsg_tjmg()}}} 48 | } 49 | \value{ 50 | A character vector with the paths to the downloaded files 51 | } 52 | \description{ 53 | Downloads an HTML with the results obtained from querying a 54 | dataset of all second degree lawsuits and then one HTML for each page of 55 | results (at most \code{max_page} pages). \code{query} should be the string to look 56 | for in the lawsuits and \code{clases}, \code{courts}, etc. should be the filtering 57 | parameters (make sure to use \code{\link[=cjsg_table]{cjsg_table()}} to get lists of all valid codes 58 | for these arguments). 59 | } 60 | \seealso{ 61 | \code{\link[=cjsg_table]{cjsg_table()}}, \code{\link[=browse_table]{browse_table()}} 62 | } 63 | -------------------------------------------------------------------------------- /R/cpopg.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Download first degree lawsuits filed in Brazilian Justice Courts 3 | #' 4 | #' @description 5 | #' This function downloads lawsuits as HTMLs. Given a lawsuit ID, and 6 | #' the path to a directory it will collect the lawsuit, and save it to 7 | #' the provided directory. 8 | #' 9 | #' @section About lawsuits: 10 | #' The lawsuits contemplated by this funtion have to be filed in a 11 | #' Brazilian Tribunal de Justica (Justice Court). [download_cpopg()] 12 | #' finds the lawsuit in its state's online Sistema de Automacao de Justica 13 | #' (Justice Automation System), solves the captcha withholding the 14 | #' information, and collects the HTML. 15 | #' 16 | #' @section Implemented TJs: 17 | #' Unfortunatelly [download_cpopg()] doesn't yet work with all 27 TJs in 18 | #' Brazil. Here are the ones already implemented: 19 | #' \itemize{ 20 | #' \item TJAM (Amazonas) 21 | #' \item TJAL (Alagoas) 22 | #' \item TJBA (Bahia) 23 | #' \item TJSC (Santa Catarina) 24 | #' \item TJSP (Sao Paulo) 25 | #' } 26 | #' 27 | #' @param id A character vector of one or more lawsuit IDs 28 | #' @param path Path to the directory where the lawsuit should be saved 29 | #' @return A character vector with the path to the downloaded lawsuit 30 | #' 31 | #' @export 32 | download_cpopg <- function(id, path = ".") { 33 | 34 | # Normalize path 35 | dir.create(path, FALSE, TRUE) 36 | path <- normalizePath(path) %>% 37 | stringr::str_c("/") 38 | 39 | # Strip ID down 40 | id <- stringr::str_replace_all(id, "[^0-9]", "") 41 | if (any(stringr::str_length(id) != 20)) { stop("Invalid ID") } 42 | 43 | # Iterate over IDs 44 | download_cpopg_ <- purrr::possibly(download_cpopg_, "") 45 | pb <- progress::progress_bar$new( 46 | "Downloading [:bar] :percent eta: :eta", length(id)) 47 | downloaded <- c() 48 | for (i in seq_along(id)) { 49 | downloaded <- append(downloaded, download_cpopg_(id[i], path)) 50 | pb$tick() 51 | } 52 | 53 | return(downloaded) 54 | } 55 | 56 | # Download one lawsuit 57 | download_cpopg_ <- function(id, path) { 58 | 59 | # Choose appropriate download function 60 | if (get_n(id) %in% c("05")) { download <- download_bw_lawsuit } 61 | else if (get_n(id) %in% c("02", "26")) { download <- download_noc_lawsuit } 62 | else { download <- download_rgb_lawsuit } 63 | 64 | # Get URLs for the download 65 | data <- get_lwst_data(id) 66 | 67 | # If file exists, return it without downloading 68 | if (file.exists(stringr::str_c(path, id, ".html"))) { 69 | return(stringr::str_c(path, id, ".html")) 70 | } 71 | 72 | # Download lawsuit 73 | download(id, path, data$u_captcha, data$u_search, cpopg_query(id)) 74 | } 75 | -------------------------------------------------------------------------------- /R/decisions.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Download PDFs corresponding to judicial decisions 3 | #' 4 | #' @description Given the unique identifier of a judicial decision 5 | #' (an 8 digit number), this function downloads the corresponding 6 | #' PDF. 7 | #' 8 | #' @param decision A character vector with decision IDs 9 | #' @param path Path to directory where to save PDF 10 | #' @param tj TJ from which to get data (only works with TJSP for now) 11 | #' @return A character vector with the path to the downloaded file 12 | #' 13 | #' @export 14 | download_decision <- function(decision, path = ".", tj = "tjsp") { 15 | 16 | # Stop if TJ isn't TJSP 17 | stopifnot(tj == "tjsp") 18 | 19 | # Create directory if necessary 20 | dir.create(path, FALSE, TRUE) 21 | 22 | # Download decisions 23 | dwld <- purrr::possibly(download_decision_, "") 24 | pb <- progress::progress_bar$new( 25 | "Downloading [:bar] :percent eta: :eta", length(decision)) 26 | downloaded <- c() 27 | for (i in seq_along(decision)) { 28 | downloaded <- append(downloaded, dwld(decision[i], path)) 29 | pb$tick() 30 | } 31 | 32 | return(downloaded) 33 | } 34 | 35 | download_decision_ <- function(decision, path, ntry = 10, verbose = FALSE) { 36 | 37 | # Download page with captcha 38 | captcha <- httr::GET( 39 | "https://esaj.tjsp.jus.br/cjsg/getArquivo.do", 40 | query = list(cdAcordao = decision, cdForo = 0), 41 | httr::config(ssl_verifypeer = FALSE)) 42 | 43 | # File where to save PDF (and return early if it exists) 44 | file <- stringr::str_c(normalizePath(path), "/", decision, ".pdf") 45 | if (file.exists(file)) { return(file) } 46 | 47 | # Try to download PDF at most ntry times 48 | for (i in 1:ntry) { 49 | 50 | # Message 51 | if (verbose) { message("Breaking captcha...") } 52 | 53 | # Download captcha itself 54 | time_stamp <- stringr::str_replace_all(lubridate::now("Brazil/East"), "[^0-9]", "") 55 | u_captcha <- "https://esaj.tjsp.jus.br/cjsg/imagemCaptcha.do" 56 | f_captcha <- download_rgb_captcha(u_captcha, time_stamp) 57 | 58 | # Query for GET request 59 | query_get <- list( 60 | conversationId = "", 61 | cdAcordao = decision, 62 | cdForo = 0, 63 | uuidCaptcha = captcha_uuid(f_captcha), 64 | vlCaptcha = break_rgb_captcha(f_captcha), 65 | novoVlCaptcha = "") 66 | 67 | # Try to open PDF 68 | pdf <- httr::GET( 69 | "https://esaj.tjsp.jus.br/cjsg/getArquivo.do", 70 | query = query_get, httr::config(ssl_verifypeer = FALSE)) 71 | 72 | # Captcha was broken 73 | mime <- "application/pdf;charset=UTF-8" 74 | if (pdf$headers[["content-type"]] == mime) { 75 | writeBin(pdf$content, file); return(file) 76 | } 77 | } 78 | 79 | return("") 80 | } 81 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
    "))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n% 36 | purrr::modify(xml2::as_list) %>% 37 | dplyr::first() %>% dplyr::nth(2) %>% 38 | purrr::keep(~is.list(.x)) %>% 39 | tree_to_tibble() %>% 40 | dplyr::mutate( 41 | name0 = ifelse(is.na(name0), name5, name0), 42 | id0 = ifelse(is.na(id0), id5, id0)) %>% 43 | dplyr::select( 44 | dplyr::ends_with('0'), dplyr::ends_with('1'), 45 | dplyr::ends_with('2'), dplyr::ends_with('3'), 46 | dplyr::ends_with('4'), dplyr::ends_with('5')) 47 | } 48 | 49 | # Download table with court information for [cjsg_table()] 50 | cjsg_courts <- function() { 51 | 52 | # Function for creating the rows of the table 53 | create_row <- function(x) { 54 | 55 | # XPaths 56 | xp_parent <- ".//span[contains(@id, 'secoes_tree')]" 57 | xp_child <- ".//li[@class='leafItem']//span[contains(@id, 'secoes_tree')]" 58 | 59 | # Create row 60 | branch <- x %>% 61 | rvest::html_node(xpath = xp_parent) %>% 62 | rvest::html_text() 63 | x %>% 64 | rvest::html_nodes(xpath = xp_child) %>% 65 | purrr::map(~tibble::tibble( 66 | id = rvest::html_attr(.x, 'value'), 67 | court = rvest::html_text(.x))) %>% 68 | dplyr::bind_rows() %>% 69 | dplyr::mutate(branch = branch) 70 | } 71 | 72 | # Create courts table 73 | stringr::str_c( 74 | "https://esaj.tjsp.jus.br/cjsg/", 75 | "secaoTreeSelect.do?campoId=secoes") %>% 76 | httr::GET(httr::config(ssl_verifypeer = FALSE)) %>% 77 | httr::content('text') %>% 78 | xml2::read_html() %>% 79 | rvest::html_nodes('li.open') %>% 80 | rlang::as_list() %>% 81 | purrr::modify(create_row) %>% 82 | dplyr::bind_rows() %>% 83 | dplyr::select(branch, court, id) 84 | } 85 | -------------------------------------------------------------------------------- /R/parser.R: -------------------------------------------------------------------------------- 1 | 2 | #' Create a parser skeleton 3 | #' @param type The type of parser (`"cpopg"` or `"cposg"`) 4 | #' @export 5 | make_parser <- function(type = "cposg") { 6 | list(name = NULL, getter = NULL) %>% rlang::set_attrs("class" = c("parser", type)) 7 | } 8 | 9 | #' Run a parser 10 | #' @param file A character vector with the paths to one ore more files 11 | #' @param parser A parser returned by [make_parser()] 12 | #' @param path The path to a directory where to save RDSs 13 | #' @param cores The number of cores to be used when parsing 14 | #' @export 15 | run_parser <- function(file, parser, path = ".", cores = 1) { 16 | 17 | # Check if parser is a parser 18 | stopifnot("parser" %in% class(parser)) 19 | 20 | # Given a parser and a file, apply getters 21 | apply_getters <- function(file, parser_path) { 22 | 23 | # Resolve parallelism problem 24 | parser <- parser_path$parser 25 | path <- parser_path$path 26 | 27 | # Apply all getters 28 | html <- xml2::read_html(file) 29 | 30 | if (hidden_lawsuit(html)) { 31 | empty_cols <- parser_path$parser$name %>% 32 | purrr::map(~list(tibble::tibble())) %>% 33 | purrr::set_names(parser_path$parser$name) %>% 34 | tibble::as_tibble() 35 | out <- tibble::tibble( 36 | id = stringr::str_extract(tools::file_path_sans_ext(basename(file)), "(?<=_).+"), 37 | file, hidden = TRUE) %>% 38 | dplyr::bind_cols(empty_cols) 39 | } else { 40 | out <- parser$getter %>% 41 | purrr::invoke_map(list(list("html" = html))) %>% 42 | purrr::set_names(parser$name) %>% 43 | purrr::modify(list) %>% 44 | dplyr::as_tibble() %>% 45 | dplyr::mutate( 46 | file = file, 47 | id = stringr::str_extract(tools::file_path_sans_ext(basename(file)), "(?<=_).+"), 48 | hidden = FALSE) %>% 49 | dplyr::select(id, file, hidden, dplyr::everything()) 50 | } 51 | 52 | # Write and return 53 | readr::write_rds(out, stringr::str_c(path, "/", out$id, ".rds")) 54 | return(out) 55 | } 56 | 57 | # Create path if necessary 58 | dir.create(path, showWarnings = FALSE, recursive = TRUE) 59 | 60 | # Apply getters to all files 61 | parser_path <- list(parser = parser, path = path) 62 | parallel::mcmapply( 63 | apply_getters, file, list(parser_path = parser_path), 64 | SIMPLIFY = FALSE, mc.cores = cores) %>% 65 | dplyr::bind_rows() 66 | } 67 | 68 | # Check if lawsuit has secret of justice 69 | hidden_lawsuit <- function(html) { 70 | !is.na(rvest::html_node(html, "#popupSenhaProcesso")) 71 | } 72 | 73 | #' Shortcut for creating and running a complete CPOSG parser 74 | #' @param file A character vector with the paths to one ore more files 75 | #' @param path The path to a directory where to save RDSs 76 | #' @param cores The number of cores to be used when parsing 77 | #' @export 78 | parse_cposg_all <- function(file, path = ".", cores = 1) { 79 | parser <- parse_decisions(parse_parts(parse_data(parse_movs(make_parser())))) 80 | run_parser(file, parser, path, cores) 81 | } 82 | 83 | #' Shortcut for creating and running a complete CPOPG parser 84 | #' @param file A character vector with the paths to one ore more files 85 | #' @param path The path to a directory where to save RDSs 86 | #' @param cores The number of cores to be used when parsing 87 | #' @export 88 | parse_cpopg_all <- function(file, path = ".", cores = 1) { 89 | parser <- parse_pd(parse_hist(parse_hearings(parse_parts(parse_data(parse_movs(make_parser("cpopg"))))))) 90 | run_parser(file, parser, path, cores) 91 | } 92 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | github_document: 4 | html_preview: false 5 | --- 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "README-") 12 | ``` 13 | 14 | # esaj 15 | 16 | [![Made In Brazil](https://img.shields.io/badge/made%20in-brazil-green.svg)](http://www.abj.org.br) [![Travis-CI Build Status](https://travis-ci.org/courtsbr/esaj.svg?branch=master)](https://travis-ci.org/courtsbr/esaj) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/courtsbr/esaj?branch=master&svg=true)](https://ci.appveyor.com/project/courtsbr/esaj) 17 | 18 | ## Overview 19 | 20 | The `esaj` R package is a simple interface that allows you to download multiple 21 | kinds of files from Brazil's e-SAJ (Electronic Justice Automation System) 22 | portals. With this package you can save and parse lawsuits, queries, and 23 | decisions with very simple, tidyverse compliant functions. 24 | 25 | To install `esaj`, run the code below: 26 | 27 | ```{r, eval = FALSE} 28 | # install.packages("devtools") 29 | devtools::install_github("courtsbr/esaj") 30 | ``` 31 | 32 | ## Usage 33 | 34 | ### Lawsuits 35 | 36 | Before `esaj` if you wanted to gather information about lawsuits being 37 | processed by Brazil's state-level Judiciary, you would have to go to each 38 | state's e-SAJ portal, manually input each lawsuit's ID, break a capthca, and 39 | only then download an HTML with the information you wanted; now you can simply 40 | run `download_cpopg()` or `download_cposg()`, and spend your valuable time 41 | analysing the data. 42 | 43 | ```{r, message = FALSE} 44 | # Download first degree lawsuits from multiple states 45 | ids <- c( 46 | "0123479-07.2012.8.26.0100", 47 | "0552486-62.2015.8.05.0001", 48 | "0303349-44.2014.8.24.0020") 49 | esaj::download_cpopg(ids, "~/Desktop/") 50 | 51 | # Download second degree lawsuits from São Paulo 52 | ids <- c( 53 | "1001869-51.2017.8.26.0562", 54 | "1001214-07.2016.8.26.0565") 55 | esaj::download_cposg(ids, "~/Desktop/") 56 | ``` 57 | 58 | For more information on how to use these functions and which TJs are 59 | implemented, please see [Downloading Lawsuits](http://courtsbr.github.io/esaj/articles/download_lawsuit.html). 60 | 61 | ### Queries 62 | 63 | Besides downloading lawsuits (see the **Downloading Lawsuits** article), `esaj` 64 | also allows the user to download the results of a query on lawsuits. This kind 65 | of query is very useful for finding out what lawsuits contain certain words, 66 | were filed in a given period, were filed in a given court, etc. 67 | 68 | 69 | ```{r} 70 | # Download results of a simple first degree query 71 | esaj::download_cjpg("recurso", "~/Desktop/") 72 | 73 | # Download results of a slightly more complex second degree query 74 | esaj::download_cjsg("recurso", "~/Desktop/", classes = c("1231", "1232")) 75 | ``` 76 | 77 | For more information on how to use these functions and all their auxiliary 78 | methods (like `peek_cj*g()` and `cj*g_table()`), please see 79 | [Downloading Queries](http://courtsbr.github.io/esaj/articles/download_query.html). 80 | 81 | ### Decisions 82 | 83 | Of all functions in the `esaj` package, `download_decision()` is probably the 84 | simplest: it downloads the PDF belonging to a decision and that's it. 85 | 86 | 87 | ```{r} 88 | # Download one decision 89 | esaj::download_decision("10000034", "~/Desktop/") 90 | 91 | # Download more than one decision 92 | esaj::download_decision(c("10800758", "10000034"), "~/Desktop/") 93 | ``` 94 | 95 | For more information on how to use this function, please see 96 | [Downloading Decisions](http://courtsbr.github.io/esaj/articles/download_decision.html). 97 | -------------------------------------------------------------------------------- /R/parse_cjpg.R: -------------------------------------------------------------------------------- 1 | 2 | #' Parse one lawsuit from a CJSG page 3 | #' 4 | #' @param node A `.fundocinza1` node extracted from the page 5 | #' @return One row with the data concerning the lawsuit 6 | parse_cjpg_lawsuit <- function(node) { 7 | 8 | # Get complicated variables 9 | cd <- node %>% 10 | xml2::xml_find_first(".//a[@title='Visualizar Inteiro Teor']") %>% 11 | rvest::html_attr("name") %>% 12 | stringr::str_trim() 13 | id <- node %>% 14 | xml2::xml_find_first(".//a[@title='Visualizar Inteiro Teor']") %>% 15 | rvest::html_text() %>% 16 | stringr::str_trim() %>% 17 | stringr::str_replace_all("[^0-9]", "") 18 | tx <- node %>% 19 | rvest::html_node(xpath = ".//table//div[@style='display: none;']") %>% 20 | rvest::html_text() %>% 21 | stringr::str_trim() 22 | 23 | # Create table prototype 24 | keys <- node %>% 25 | rvest::html_nodes("table > tr > td > strong") %>% 26 | rvest::html_text() %>% 27 | stringr::str_trim() %>% 28 | stringr::str_to_lower() %>% 29 | rm_accent() %>% 30 | stringr::str_trim() %>% 31 | stringr::str_replace_all("[^0-9a-z]+", "_") %>% 32 | stringr::str_replace_all("^_|_$", "") 33 | vals <- node %>% 34 | xml2::xml_find_all(".//table/tr/td/strong/following-sibling::text()[1]") %>% 35 | rvest::html_text() %>% 36 | stringr::str_trim() 37 | infos <- tibble::tibble(key = keys, val = vals) %>% 38 | dplyr::mutate(id = 1) %>% 39 | tidyr::spread(key, val) %>% 40 | dplyr::select(-id) 41 | 42 | # Build final table 43 | tibble::tibble(id_lawsuit = id, code_lawsuit = cd) %>% 44 | dplyr::bind_cols(infos) %>% 45 | dplyr::mutate(summary = tx) 46 | } 47 | 48 | #' Parse a page of CJPG results 49 | #' 50 | #' @param file The path to the file to be parsed 51 | #' @param pb Progress bar created by [parse_cjpg()] 52 | #' @return A tibble with the parsed information 53 | parse_cjpg_ <- function(file, pb = NULL) { 54 | 55 | # Safely parse everything 56 | parse <- purrr::possibly(parse_cjpg_lawsuit, tibble::tibble(), quiet = FALSE) 57 | 58 | # Iterate over xml nodes to parse every lawsuit 59 | table <- file %>% 60 | xml2::read_html("UTF-8") %>% 61 | rvest::html_nodes(".fundocinza1") %>% 62 | purrr::map_dfr(parse) 63 | 64 | if (!is.null(pb)) { pb$tick() } 65 | return(table) 66 | } 67 | 68 | #' Parse lawsuits extracted from CJPG query 69 | #' 70 | #' @param file Character vector with the paths to one or more files 71 | #' @param cores Number of cores to use when parsing 72 | #' 73 | #' @return A tibble with the columns 74 | #' \itemize{ 75 | #' \item `file` Name of the file 76 | #' \item `id_lawsuit` Number of the lawsuit (doesn't have to be unique) 77 | #' \item `code_lawsuit` Unique code of the lawsuit 78 | #' \item `subject` Subject of the lawsuit 79 | #' \item `class` Class of the subject 80 | #' \item `district` Name of the district 81 | #' \item `date_available` Date when lawsuit was made available (\%d/\%m/\%Y) 82 | #' \item `jurisdiction` Name of the jurisdiction 83 | #' \item `judge` Name of the judge 84 | #' \item `court` Body responsible for the lawsuit 85 | #' \item `summary` Summary of the ruling 86 | #' } 87 | #' @export 88 | parse_cjpg <- function(file, cores = 1) { 89 | 90 | # Set names for .id 91 | names(file) <- file 92 | file <- file[file.size(file) > 0] 93 | 94 | # Run either with progress bar or on parallel 95 | if (cores == 1) { 96 | pb <- progress::progress_bar$new(total = length(file)) 97 | purrr::map_dfr(file, parse_cjpg_, pb, .id = "file") 98 | } else { 99 | file %>% 100 | parallel::mclapply(parse_cjpg_, mc.cores = cores) %>% 101 | dplyr::bind_rows(.id = "file") 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | esaj 3 | ===================================================== 4 | 5 | [![Made In Brazil](https://img.shields.io/badge/made%20in-brazil-green.svg)](http://www.abj.org.br) [![Travis-CI Build Status](https://travis-ci.org/courtsbr/esaj.svg?branch=master)](https://travis-ci.org/courtsbr/esaj) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/courtsbr/esaj?branch=master&svg=true)](https://ci.appveyor.com/project/courtsbr/esaj) 6 | 7 | Overview 8 | -------- 9 | 10 | The `esaj` R package is a simple interface that allows you to download multiple kinds of files from Brazil's e-SAJ (Electronic Justice Automation System) portals. With this package you can save and parse lawsuits, queries, and decisions with very simple, tidyverse compliant functions. 11 | 12 | To install `esaj`, run the code below: 13 | 14 | ``` r 15 | # install.packages("devtools") 16 | devtools::install_github("courtsbr/esaj") 17 | ``` 18 | 19 | Usage 20 | ----- 21 | 22 | ### Lawsuits 23 | 24 | Before `esaj` if you wanted to gather information about lawsuits being processed by Brazil's state-level Judiciary, you would have to go to each state's e-SAJ portal, manually input each lawsuit's ID, break a capthca, and only then download an HTML with the information you wanted; now you can simply run `download_cpopg()` or `download_cposg()`, and spend your valuable time analysing the data. 25 | 26 | ``` r 27 | # Download first degree lawsuits from multiple states 28 | ids <- c( 29 | "0123479-07.2012.8.26.0100", 30 | "0552486-62.2015.8.05.0001", 31 | "0303349-44.2014.8.24.0020") 32 | esaj::download_cpopg(ids, "~/Desktop/") 33 | #> [1] "/Users/user/Desktop/01234790720128260100.html" 34 | #> [2] "/Users/user/Desktop/05524866220158050001.html" 35 | #> [3] "/Users/user/Desktop/03033494420148240020.html" 36 | 37 | # Download second degree lawsuits from São Paulo 38 | ids <- c( 39 | "1001869-51.2017.8.26.0562", 40 | "1001214-07.2016.8.26.0565") 41 | esaj::download_cposg(ids, "~/Desktop/") 42 | #> [1] "/Users/user/Desktop/10018695120178260562.html" 43 | #> [2] "/Users/user/Desktop/10012140720168260565.html" 44 | ``` 45 | 46 | For more information on how to use these functions and which TJs are implemented, please see [Downloading Lawsuits](http://courtsbr.github.io/esaj/articles/download_lawsuit.html). 47 | 48 | ### Queries 49 | 50 | Besides downloading lawsuits (see the **Downloading Lawsuits** article), `esaj` also allows the user to download the results of a query on lawsuits. This kind of query is very useful for finding out what lawsuits contain certain words, were filed in a given period, were filed in a given court, etc. 51 | 52 | ``` r 53 | # Download results of a simple first degree query 54 | esaj::download_cjpg("recurso", "~/Desktop/") 55 | #> [1] "/Users/user/Desktop/search.html" 56 | #> [2] "/Users/user/Desktop/page1.html" 57 | 58 | # Download results of a slightly more complex second degree query 59 | esaj::download_cjsg("recurso", "~/Desktop/", classes = c("1231", "1232")) 60 | #> [1] "/Users/user/Desktop/search.html" 61 | #> [2] "/Users/user/Desktop/page1.html" 62 | ``` 63 | 64 | For more information on how to use these functions and all their auxiliary methods (like `peek_cj*g()` and `cj*g_table()`), please see [Downloading Queries](http://courtsbr.github.io/esaj/articles/download_query.html). 65 | 66 | ### Decisions 67 | 68 | Of all functions in the `esaj` package, `download_decision()` is probably the simplest: it downloads the PDF belonging to a decision and that's it. 69 | 70 | ``` r 71 | # Download one decision 72 | esaj::download_decision("10000034", "~/Desktop/") 73 | #> [1] "/Users/user/Desktop/10000034.pdf" 74 | 75 | # Download more than one decision 76 | esaj::download_decision(c("10800758", "10000034"), "~/Desktop/") 77 | #> [1] "/Users/user/Desktop/10800758.pdf" 78 | #> [2] "/Users/user/Desktop/10000034.pdf" 79 | ``` 80 | 81 | For more information on how to use this function, please see [Downloading Decisions](http://courtsbr.github.io/esaj/articles/download_decision.html). 82 | -------------------------------------------------------------------------------- /docs/reference/make_parser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Makes a parser — make_parser • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Makes a parser

    99 | 100 | 101 |
    make_parser()
    102 | 103 | 104 |
    105 | 111 |
    112 | 113 |
    114 |
    115 |

    Site built with pkgdown

    116 |
    117 |
    118 |
    119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Pipe operator — %>% • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    See %>% for more details.

    99 |

    See %<>% for more details.

    100 | 101 | 102 | 103 | 104 |
    105 | 111 |
    112 | 113 |
    114 |
    115 |

    Site built with pkgdown

    116 |
    117 |
    118 |
    119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /docs/reference/op-definition.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Definition operator — := • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    See := for more details.

    99 | 100 | 101 | 102 | 103 |
    104 | 110 |
    111 | 112 |
    113 |
    114 |

    Site built with pkgdown

    115 |
    116 |
    117 |
    118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 | 94 | 95 |
    96 |
    97 |
    98 |

    All vignettes

    99 |

    100 | 101 | 106 |
    107 |
    108 |
    109 | 110 |
    111 |
    112 |

    Site built with pkgdown

    113 |
    114 |
    115 |
    116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 |
      98 |
    • 99 |

      Caio Lente. Author, maintainer. 100 |

      101 |
    • 102 |
    • 103 |

      Fernando Corrêa. Author. 104 |

      105 |
    • 106 |
    • 107 |

      Julio Trecenti. Author. 108 |

      109 |
    • 110 |
    • 111 |

      ABJ. Copyright holder, funder. 112 |

      113 |
    • 114 |
    115 | 116 |
    117 | 118 |
    119 | 120 | 121 |
    122 |
    123 |

    Site built with pkgdown

    124 |
    125 |
    126 |
    127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /vignettes/download_lawsuit.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Downloading Lawsuits" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Vignette Title} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r setup, include = FALSE} 11 | library(esaj) 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>") 15 | ``` 16 | 17 | The `esaj` package has two very useful functions for downloading lawsuits 18 | filed in Brazilian Justice Courts (*Tribunais de Justiça*). The only 19 | arguments you have to provide to them are the lawsuit's unique number 20 | and the directory where to save the downloaded file. 21 | 22 | These two functions are called `download_cpopg()` and `download_cposg()`. 23 | The first one works for first degree lawsuits, while the second one works 24 | for second degree lawsuits; despite this superficial difference, both 25 | download lawsuits as HTMLs and return the paths to the downloaded files 26 | in the exact same way. 27 | 28 | It's important to note that you shouldn't provide a filename to the `path` 29 | argument. You can pass more than one lawsuit ID to these functions, meaning 30 | that `path` has to contain the directory where all the files should be 31 | downloaded to. 32 | 33 | ## Basic usage 34 | 35 | To download a first degree lawsuits, simply provide the lawsuit's unique number 36 | (ID) and the path to the directory where the file should be saved. The ID doesn't 37 | necessarily have to have all divisor marks, but I'll leave them here for clarity 38 | purposes anyway. 39 | 40 | ```{r} 41 | download_cpopg("0123479-07.2012.8.26.0100", "~/Desktop/") 42 | ``` 43 | 44 | If you want to download more than one lawsuit, provide all IDs in a character 45 | vector and the function will take care of the rest. All lawsuits will be 46 | independently downloaded to the provided directory as if the fuction had been 47 | called once for each ID. 48 | 49 | ```{r, message = FALSE} 50 | ids <- c( 51 | "0123479-07.2012.8.26.0100", 52 | "0552486-62.2015.8.05.0001", 53 | "0303349-44.2014.8.24.0020") 54 | download_cpopg(ids, "~/Desktop/") 55 | ``` 56 | 57 | Note that these lawsuits all belong to different TJs. `download_cpopg()` is 58 | able to identify the Justice Court where the lawsuit was filed and 59 | automatically look for it in the correct website (for a list of all valid TJs 60 | see the **Implemented TJs** section below). 61 | 62 | `esaj` is able to download lawsuits from many TJs because it is capable of 63 | breaking multiple types of captchas (courtesey of the `decryptr` package). 64 | The functions used for this task are not exported, but you can check them 65 | out with `esaj:::download_noc_lawsuit()`, `esaj:::download_bw_lawsuit()`, 66 | and `esaj:::download_rgb_lawsuit()`. 67 | 68 | `download_cposg()` works in the exact same way as `download_cpopg()`, but, 69 | as of the day of this writing, it is only able to download lawsuits filed in 70 | São Paulo (TJSP). 71 | 72 | ```{r} 73 | ids <- c( 74 | "1001869-51.2017.8.26.0562", 75 | "1001214-07.2016.8.26.0565") 76 | download_cposg(ids, "~/Desktop/") 77 | ``` 78 | 79 | The only case where `download_cposg()` differs in behaviour from 80 | `download_cpopg()` is when the lawsuit turns out to be a list of "closed" 81 | lawsuits. If this ever happens, `download_cposg()` adds some information 82 | to the filenames. 83 | 84 | ```{r} 85 | download_cposg("00000144420138260352", "~/Desktop/") 86 | ``` 87 | 88 | ## Implemented TJs 89 | 90 | Unfortunately we haven't yet implemented downloaders for all Justice Courts. 91 | These are the ones implemented at the moment: 92 | 93 | ```{r, echo = FALSE} 94 | tjs <- dplyr::tibble( 95 | Court = c( 96 | "Acre (AC)", "Alagoas (AL)", "Amapá (AP)", "Amazonas (AM)", "Bahia (BA)", 97 | "Ceará (CE)", "Distrito Federal (DF)", "Espírito Santo (ES)", "Goiás (GO)", 98 | "Maranhão (MA)", "Mato Grosso (MT)", "Mato Grosso do Sul (MS)", 99 | "Minas Gerais (MG)", "Pará (PA) ", "Paraíba (PB)", "Paraná (PR)", 100 | "Pernambuco (PE)", "Piauí (PI)", "Rio de Janeiro (RJ)", 101 | "Rio Grande do Norte (RN)", "Rio Grande do Sul (RS)", "Rondônia (RO)", 102 | "Roraima (RR)", "Santa Catarina (SC)", "São Paulo (SP)", "Sergipe (SE)", 103 | "Tocantins (TO)"), 104 | `download_cpopg()` = c( 105 | rep(FALSE, 3), rep(TRUE, 2), rep(FALSE, 18), rep(TRUE, 2), rep(FALSE, 2)), 106 | `download_cposg()` = c(rep(FALSE, 24), TRUE, rep(FALSE, 2))) 107 | knitr::kable(tjs) 108 | ``` 109 | -------------------------------------------------------------------------------- /R/aux_captcha.R: -------------------------------------------------------------------------------- 1 | 2 | # Remove captcha's background and image 3 | rm_bg_and_lines <- function(img) { 4 | img %>% 5 | dplyr::filter(y > 15) %>% 6 | dplyr::group_by(color) %>% 7 | dplyr::mutate(n = n()) %>% 8 | dplyr::ungroup() %>% 9 | dplyr::filter(n < max(n)) %>% 10 | dplyr::filter(n > sort(unique(n), decreasing = TRUE)[3]) 11 | } 12 | 13 | # Detect whether file has a captcha 14 | has_captcha <- function(file) { 15 | (httr::content(file, 'text', encoding = "ISO-8859-1") %>% 16 | xml2::read_html() %>% 17 | rvest::html_nodes('#captchaCodigo') %>% 18 | length()) > 0 19 | } 20 | has_captcha <- purrr::possibly(has_captcha, TRUE) 21 | 22 | # Get captcha's UUID 23 | captcha_uuid <- function(file) { 24 | jsonlite::fromJSON(file)$uuidCaptcha 25 | } 26 | captcha_uuid <- purrr::possibly(captcha_uuid, "xxxx") 27 | 28 | # Create query to download lawsuit 29 | cpopg_query <- function(id) { 30 | list( 31 | "conversationId" = "", 32 | "dadosConsulta.localPesquisa.cdLocal" = "-1", 33 | "cbPesquisa" = "NUMPROC", 34 | "dadosConsulta.tipoNuProcesso" = "UNIFICADO", 35 | "numeroDigitoAnoUnificado" = stringr::str_sub(id, 1, 13), 36 | "foroNumeroUnificado" = stringr::str_sub(id, -4, -1), 37 | "dadosConsulta.valorConsultaNuUnificado" = id, 38 | "dadosConsulta.valorConsulta" = "", 39 | "uuidCaptcha" = "", 40 | "vlCaptcha" = "", 41 | "novoVlCaptcha" = "") 42 | } 43 | 44 | # Create query to download 2nd degree lawsuit 45 | cposg_query <- function(id) { 46 | list( 47 | conversationId = "", 48 | paginaConsulta = 1, 49 | "localPesquisa.cdLocal" = -1, 50 | cbPesquisa = "NUMPROC", 51 | tipoNuProcesso = "UNIFICADO", 52 | numeroDigitoAnoUnificado = stringr::str_sub(id, 1, 11), 53 | foroNumeroUnificado = stringr::str_sub(id, -4, -1), 54 | dePesquisaNuUnificado = id, 55 | dePesquisaNuAntigo = "") 56 | } 57 | 58 | # Break RGB captcha 59 | break_rgb_captcha <- function(file) { 60 | 61 | # Require magick and tesseract 62 | require_pkg("magick") 63 | require_pkg("tesseract") 64 | 65 | # Get file's JSON 66 | json <- jsonlite::fromJSON(file) 67 | 68 | # Collect file's image 69 | image <- json %>% 70 | with(imagem) %>% 71 | stringr::str_split_fixed(",", 2) %>% 72 | magrittr::extract(TRUE, 2) %>% 73 | base64enc::base64decode() 74 | 75 | # Collect file's colors 76 | color_json <- json %>% 77 | with(labelValorCaptcha) %>% 78 | stringr::str_match("([A-Za-z]+)") %>% 79 | magrittr::extract(TRUE, 2) 80 | 81 | # Create image data frame 82 | img_png <- png::readPNG(image) 83 | img_dim <- dim(img_png) 84 | img_df <- tibble::tibble( 85 | x = rep(1:img_dim[2], each = img_dim[1]), 86 | y = rep(img_dim[1]:1, img_dim[2]), 87 | r = as.vector(img_png[,,1]), 88 | g = as.vector(img_png[,,2]), 89 | b = as.vector(img_png[,,3])) %>% 90 | dplyr::mutate(color = rgb(r, g, b), id = 1:n()) %>% 91 | rm_bg_and_lines() 92 | 93 | # Fill in data frame 94 | tmp <- tempfile(fileext = ".png") 95 | complete_df <- purrr::cross_df(list( 96 | x = min(img_df$x):max(img_df$x), 97 | y = min(img_df$y):max(img_df$y))) 98 | 99 | # Save image to temporary file 100 | img_df %>% 101 | filter_color(color_json) %>% 102 | dplyr::mutate(black = 0) %>% 103 | dplyr::arrange(x, y) %>% 104 | dplyr::right_join(complete_df, c("x", "y")) %>% 105 | tidyr::replace_na(list(black = 1)) %>% 106 | dplyr::select(x, y, black) %>% 107 | tidyr::spread(x, black, fill = 1) %>% 108 | dplyr::select(-y) %>% 109 | as.matrix() %>% 110 | magrittr::extract(nrow(.):1, TRUE) %>% 111 | png::writePNG(tmp) 112 | 113 | # Guess captcha's solution 114 | sol <- tmp %>% 115 | magick::image_read() %>% 116 | magick::image_trim() %>% 117 | magick::image_scale("x50") %>% 118 | tesseract::ocr() %>% 119 | stringr::str_trim() %>% 120 | stringr::str_to_lower() %>% 121 | stringr::str_replace_all("[^a-z]", "") 122 | 123 | file.remove(tmp) 124 | return(sol) 125 | } 126 | break_rgb_captcha <- purrr::possibly(break_rgb_captcha, "xxxx") 127 | 128 | # Download an RGB captcha to a temporary file 129 | download_rgb_captcha <- function(u_captcha, ts = "") { 130 | 131 | # Download captcha useing time stamp 132 | tmp <- tempfile() 133 | r <- httr::POST( 134 | u_captcha, 135 | body = list(timestamp = ts, uuidCaptcha = "", conversationId = ""), 136 | config = httr::config(ssl_verifypeer = FALSE), 137 | httr::write_disk(tmp, overwrite = TRUE) 138 | ) 139 | 140 | return(tmp) 141 | } 142 | -------------------------------------------------------------------------------- /docs/reference/parse_data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Parses data — parse_data • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parses data

    99 | 100 | 101 |
    parse_data(parser)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    parser

    A parser returned by make_parser()

    111 | 112 | 113 |
    114 | 121 |
    122 | 123 |
    124 |
    125 |

    Site built with pkgdown

    126 |
    127 |
    128 |
    129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /docs/reference/parse_parts.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Parses parts — parse_parts • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parses parts

    99 | 100 | 101 |
    parse_parts(parser)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    parser

    A parser returned by make_parser()

    111 | 112 | 113 |
    114 | 121 |
    122 | 123 |
    124 |
    125 |

    Site built with pkgdown

    126 |
    127 |
    128 |
    129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /docs/reference/parse_movs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Parses movements — parse_movs • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parses movements

    99 | 100 | 101 |
    parse_movs(parser)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    parser

    A parser returned by make_parser()

    111 | 112 | 113 |
    114 | 121 |
    122 | 123 |
    124 |
    125 |

    Site built with pkgdown

    126 |
    127 |
    128 |
    129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /docs/reference/parse_decisions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Parses decisions — parse_decisions • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parses decisions

    99 | 100 | 101 |
    parse_decisions(parser)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    parser

    A parser returned by make_parser()

    111 | 112 | 113 |
    114 | 121 |
    122 | 123 |
    124 |
    125 |

    Site built with pkgdown

    126 |
    127 |
    128 |
    129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /R/parse_cposg.R: -------------------------------------------------------------------------------- 1 | 2 | #' Parses parts 3 | #' @param parser A parser returned by [make_parser()] 4 | #' @export 5 | parse_parts.cposg <- function(parser) { 6 | 7 | # Check class 8 | stopifnot("parser" %in% class(parser)) 9 | 10 | # Function for getting parts 11 | get_parts <- function(html) { 12 | html %>% 13 | xml2::xml_find_all("//*[@id='tablePartesPrincipais']") %>% 14 | rvest::html_table(fill = TRUE) %>% 15 | purrr::pluck(1) %>% 16 | dplyr::as_tibble() %>% 17 | dplyr::mutate( 18 | X2 = stringr::str_split(X2, " "), 19 | id = 1:nrow(.)) %>% 20 | tidyr::unnest(X2) %>% 21 | dplyr::mutate( 22 | part = str_replace_all(X1, "[^a-zA-Z]", ""), 23 | role = stringr::str_extract(dplyr::lag(X2), "\\t [a-zA-Z]+:"), 24 | role = str_replace_all(role, "[^a-zA-Z]", ""), 25 | role = ifelse(is.na(role), part, role), 26 | name = str_replace_all(X2, " ?\\n.+", "")) %>% 27 | dplyr::select(id, name, part, role) 28 | } 29 | 30 | # Add get_parts to getters 31 | purrr::list_merge(parser, name = "parts", getter = get_parts) 32 | } 33 | 34 | #' Parses data 35 | #' @param parser A parser returned by [make_parser()] 36 | #' @export 37 | parse_data.cposg <- function(parser) { 38 | 39 | # Check class 40 | stopifnot("parser" %in% class(parser)) 41 | 42 | # Function for getting data 43 | get_data <- function(html) { 44 | html %>% 45 | xml2::xml_find_all("//*[@class='secaoFormBody']") %>% 46 | rvest::html_table(fill = TRUE) %>% 47 | purrr::pluck(2) %>% 48 | dplyr::as_tibble() %>% 49 | dplyr::filter(!(is.na(X2) & is.na(X3))) %>% 50 | dplyr::select(-X3) %>% 51 | dplyr::add_row( 52 | X1 = "Situa\u00E7\u00E3o", 53 | X2 = stringr::str_extract(.[1, 2], "[A-Za-z]+$")) %>% 54 | dplyr::mutate( 55 | X1 = str_replace_all(X1, ":", ""), 56 | X2 = str_replace_all(X2, " ?[\\n\\t].+", ""), 57 | X2 = str_replace_all(X2, "\\n", "")) %>% 58 | purrr::set_names("data", "value") 59 | } 60 | 61 | # Add get_data to getters 62 | purrr::list_merge(parser, name = "data", getter = get_data) 63 | } 64 | 65 | #' Parses movements 66 | #' @param parser A parser returned by [make_parser()] 67 | #' @export 68 | parse_movs.cposg <- function(parser) { 69 | 70 | # Check class 71 | stopifnot("parser" %in% class(parser)) 72 | 73 | # Function for getting movements 74 | get_movs <- function(html) { 75 | xp0 <- "//*[@id='tabelaTodasMovimentacoes']" 76 | tab <- xml2::xml_find_all(html, paste0(xp0, "//parent::table")) 77 | tab %>% 78 | rvest::html_table(fill = TRUE) %>% 79 | purrr::pluck(1) %>% 80 | janitor::clean_names() %>% 81 | dplyr::as_tibble() %>% 82 | dplyr::select(movement = data, X3 = movimento) %>% 83 | dplyr::filter(movement != "") %>% 84 | tidyr::separate(X3, c("title", "txt"), sep = "\n\t", 85 | extra = "merge", fill = "right") %>% 86 | dplyr::mutate_all(stringr::str_squish) %>% 87 | dplyr::mutate(movement = lubridate::dmy(movement, quiet = TRUE)) 88 | } 89 | 90 | # Add get_movs to getters 91 | purrr::list_merge(parser, name = "movs", getter = get_movs) 92 | } 93 | 94 | #' Parses decisions 95 | #' @param parser A parser returned by [make_parser()] 96 | #' @export 97 | parse_decisions.cposg <- function(parser){ 98 | 99 | # Check class 100 | stopifnot("parser" %in% class(parser)) 101 | 102 | # Function for getting decisions 103 | get_decisions <- function(html) { 104 | 105 | #Gets all eligible tables 106 | tables <- html %>% 107 | xml2::xml_find_all("//table[@style='margin-left:15px; margin-top:1px;']") 108 | 109 | #Beginning of the table 110 | first_table <- tables %>% 111 | rvest::html_text() %>% 112 | stringr::str_which("Situa\u00e7\u00e3o do julgamento") %>% 113 | max() 114 | 115 | #Check if first_table is Inf 116 | if(is.infinite(first_table)){return(dplyr::data_frame(date = NA, decision = NA))} 117 | 118 | #End of the table 119 | last_table <- length(tables) 120 | 121 | tables[first_table:last_table] %>% 122 | rvest::html_table(fill = TRUE) %>% 123 | dplyr::bind_rows() %>% 124 | dplyr::as_tibble() %>% 125 | dplyr::mutate( 126 | X1 = lubridate::dmy(X1, quiet = TRUE), 127 | X2 = stringr::str_replace_all(X2, "[:space:]+"," "), 128 | X3 = stringr::str_replace_all(X3, "[:space:]+", " ")) %>% 129 | dplyr::select(-X2) %>% 130 | dplyr::filter(!is.na(X1)) %>% 131 | purrr::set_names("date", "decision") 132 | } 133 | 134 | # Add get_decisions to getters 135 | purrr::list_merge(parser, name = "decisions", getter = get_decisions) 136 | } 137 | -------------------------------------------------------------------------------- /R/parse_cjsg.R: -------------------------------------------------------------------------------- 1 | #' Parse lawsuits extracted from CJSG query 2 | #' 3 | #' @param file Character vector with the paths to one or more files 4 | #' @param cores Number of cores to use when parsing 5 | #' 6 | #' @return A tibble with the columns 7 | #' \itemize{ 8 | #' \item `file` Name of the file 9 | #' \item `id_page` ID found in the page 10 | #' \item `id_decision` Unique ID of the ruling 11 | #' \item `id_lawsuit` Number of the lawsuit (doesn't have to be unique) 12 | #' \item `class_subject` Class/subject, separated by slashes 13 | #' \item `district` Name of the district 14 | #' \item `court` Body responsible for the appeal 15 | #' \item `date_decision` Date of the judgement (\%d/\%m/\%Y) 16 | #' \item `date_publication` Date of the publication (\%d/\%m/\%Y) 17 | #' \item `date_registration` Date of registration in the system (\%d/\%m/\%Y) 18 | #' \item `rapporteur` Name of the rapporteur 19 | #' \item `summary` Summary of the ruling 20 | #' \item `txt_summary` Text of the summary with no formatting 21 | #' } 22 | #' @export 23 | parse_cjsg <- function(file, cores = 1) { 24 | 25 | # Set names for .id 26 | names(file) <- file 27 | file <- file[file.size(file) > 0] 28 | 29 | # Run either with progress bar or on parallel 30 | if (cores == 1) { 31 | pb <- progress::progress_bar$new(total = length(file)) 32 | purrr::map_dfr(file, parse_cjsg_, pb, .id = "file") 33 | } else { 34 | file %>% 35 | parallel::mclapply(parse_cjsg_, mc.cores = cores) %>% 36 | dplyr::bind_rows(.id = "file") 37 | } 38 | } 39 | 40 | #' Parse a page of CJSG results 41 | #' 42 | #' @param file The path to the file to be parsed 43 | #' @param pb Progress bar created by [parse_cjsg()] 44 | #' @return A tibble with the parsed information 45 | parse_cjsg_ <- function(file, pb = NULL) { 46 | 47 | # Safely parse everything 48 | parse <- purrr::possibly(parse_cjsg_lawsuit, tibble::tibble(), quiet = FALSE) 49 | 50 | # Iterate over xml nodes to parse every lawsuit 51 | table <- file %>% 52 | xml2::read_html("UTF-8") %>% 53 | rvest::html_nodes(".fundocinza1") %>% 54 | purrr::map_dfr(parse) 55 | 56 | if (!is.null(pb)) { pb$tick() } 57 | return(table) 58 | } 59 | 60 | #' Parse one lawsuit from a CJSG page 61 | #' 62 | #' @param node A `.fundocinza1` node extracted from the page 63 | #' @return One row with the data concerning the lawsuit 64 | parse_cjsg_lawsuit <- function(node) { 65 | 66 | # Auxiliary function to fill in missing columns in table 67 | fill_in_columns <- function(data) { 68 | 69 | # Fill in ementa and publicacao 70 | if (!tibble::has_name(data, "ementa")) 71 | data <- dplyr::mutate(data, ementa = NA_character_) 72 | if (!tibble::has_name(data, "data_publicacao")) 73 | data <- dplyr::mutate(data, data_publicacao = NA_character_) 74 | 75 | return(data) 76 | } 77 | 78 | # Auxiliary function to create a column that doesn't exist 79 | fncols <- function(data, cname) { 80 | add <-cname[!cname%in%names(data)] 81 | 82 | if(length(add)!=0) data[add] <- NA_character_ 83 | data 84 | } 85 | 86 | # Get information from lawsuit 87 | tmp <- rvest::html_node(node, ".downloadEmenta") 88 | infos <- tibble::tibble( 89 | id_lawsuit = stringr::str_trim(rvest::html_text(tmp)), 90 | id_decision = rvest::html_attr(tmp, "cdacordao")) 91 | 92 | # Get complicated variables 93 | id <- node %>% 94 | rvest::html_node(".ementaClass") %>% 95 | rvest::html_text() %>% 96 | stringr::str_trim() %>% 97 | stringr::str_replace_all("[^0-9]", "") 98 | cs <- node %>% 99 | rvest::html_node(".assuntoClasse") %>% 100 | rvest::html_text() %>% 101 | stringr::str_trim() 102 | ts <- node %>% 103 | rvest::html_node("textarea") %>% 104 | rvest::html_text() 105 | 106 | # Create final table 107 | node %>% 108 | rvest::html_nodes(".ementaClass2") %>% 109 | rvest::html_text() %>% 110 | stringr::str_split_fixed(":", 2) %>% 111 | tibble::as_tibble() %>% 112 | purrr::set_names(c("key", "val")) %>% 113 | dplyr::mutate_all(stringr::str_trim) %>% 114 | dplyr::mutate( 115 | key = key %>% 116 | rm_accent() %>% 117 | stringr::str_to_lower() %>% 118 | stringr::str_replace_all(" +", "_") %>% 119 | stringr::str_replace_all("[^a-z_]", "") %>% 120 | stringr::str_replace_all("_d[eo]_", "_")) %>% 121 | tidyr::spread(key, val) %>% 122 | dplyr::bind_cols(infos) %>% 123 | fill_in_columns() %>% 124 | dplyr::mutate(id = id, cs = cs, ts = ts) %>% 125 | fncols("data_julgamento") %>% 126 | dplyr::select( 127 | id_page = id, id_decision, id_lawsuit, class_subject = cs, 128 | district = comarca, court = orgao_julgador, date_decision = data_julgamento, 129 | date_publication = data_publicacao, date_registration = data_registro, 130 | rapporteur = relatora, summary = ementa, txt_summary = ts) 131 | } 132 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{\link[magrittr]{\%>\%}} for more details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @importFrom magrittr %>% 9 | NULL 10 | 11 | # Symbols 12 | "!!" <- rlang::`!!` 13 | ":=" <- rlang::`:=` 14 | "%||%" <- purrr::`%||%` 15 | 16 | # Shortcuts for stringr functions 17 | str_replace_all <- stringr::str_replace_all 18 | str_detect <- stringr::str_detect 19 | 20 | # Function for extracting elements from vector given e1 | e2 21 | extract_or <- function(x, e1, e2) { 22 | magrittr::or(e1, e2) %>% magrittr::extract(x, .) 23 | } 24 | 25 | # Convert Portuguese months to number 26 | conv_month <- function(date) { 27 | 28 | # Convert date to more sensible format 29 | date <- stringr::str_replace_all(date, " de ", "-") 30 | month <- stringr::str_extract(date, "[:alpha:]+") 31 | 32 | # Get month number 33 | month <- switch (month, 34 | "janeiro" = "1", 35 | "fevereiro" = "2", 36 | "mar\u00e7o" = "3", 37 | "abril" = "4", 38 | "maio" = "5", 39 | "junho" = "6", 40 | "julho" = "7", 41 | "agosto" = "8", 42 | "setembro" = "9", 43 | "outubro" = "10", 44 | "novembro" = "11", 45 | "dezembro" = "12") 46 | 47 | # Replace name with number 48 | stringr::str_replace(date, "[:alpha:]+", month) 49 | } 50 | 51 | # Convert a date to pt_BR format 52 | date_pt <- function(date) { 53 | 54 | # Check if string is empty 55 | if (stringr::str_length(date) == 0) { return(date) } 56 | 57 | # Apply conversion 58 | date <- lubridate::as_date(date) 59 | stringr::str_c( 60 | stringr::str_pad(lubridate::day(date), 2, "left", "0"), 61 | stringr::str_pad(lubridate::month(date), 2, "left", "0"), 62 | lubridate::year(date), 63 | sep = "/") 64 | } 65 | 66 | # Return time in a human readable way 67 | how_long <- function(x) { 68 | 69 | if (x < 60) { 70 | round(x, 1) %>% stringr::str_c(" seconds") 71 | } else if (x < 3600) { 72 | round(x/60, 1) %>% stringr::str_c(" minutes") 73 | } else if (x < 86400) { 74 | round(x/3600 , 1) %>% stringr::str_c(" hours") 75 | } else if (x < 604800) { 76 | round(x/86400, 1) %>% stringr::str_c(" days") 77 | } else { 78 | round(x/604800, 1) %>% stringr::str_c(" weeks") 79 | } 80 | } 81 | 82 | # Transform an XML tree into a tibble 83 | tree_to_tibble <- function(tree, n = 0) { 84 | 85 | # Extract category names 86 | names <- tree %>% 87 | purrr::map(purrr::pluck, 2, 1) %>% 88 | purrr::compact() %>% 89 | magrittr::extract(. != "") %>% 90 | purrr::flatten_chr() 91 | 92 | # Extract category codes 93 | ids <- tree %>% 94 | purrr::map(purrr::pluck, 2) %>% 95 | purrr::map(attr, "value") %>% 96 | purrr::compact() %>% 97 | magrittr::extract(. != "") %>% 98 | purrr::flatten_chr() 99 | 100 | # Iterate over every branch of tree 101 | purrr::imap_dfr(lengths(tree, FALSE), function(len, i) { 102 | 103 | # If element is a leaf node, return it's contents 104 | # Otherwise recur on it's elements 105 | if (len == 3) { 106 | dplyr::tibble(name5 = names[i], id5 = ids[i]) 107 | } 108 | else { 109 | tree %>% 110 | purrr::pluck(i, 4) %>% 111 | magrittr::extract(names(.) == 'li') %>% 112 | tree_to_tibble(n + 1) %>% 113 | dplyr::mutate( 114 | !!stringr::str_c("name", n) := names[i], 115 | !!stringr::str_c("id", n) := ids[i]) 116 | } 117 | }) 118 | } 119 | 120 | # Remove diacritics (from abjutils) 121 | rm_accent <- function(x) { 122 | if (.Platform$OS.type == 'unix') { 123 | stringr::str_replace_all(iconv(x, to = "ASCII//TRANSLIT"), "[`'\"^~]", "") 124 | } else { 125 | gsub("`", "", iconv(x, from = 'latin1', to="ASCII//TRANSLIT")) 126 | } 127 | } 128 | 129 | # Send message if package is required 130 | require_pkg <- function(package) { 131 | 132 | if (!requireNamespace(package, quietly = TRUE)) { 133 | 134 | if (package == "captchasaj") { 135 | package <- "jtrecenti/captchasaj" 136 | } else if (package == "decryptr") { 137 | package <- "decryptr/decryptr" 138 | } 139 | 140 | msg <- stringr::str_c( 141 | "What you're trying to do requires the ", 142 | package, " package. Please install it.") 143 | stop(msg, call. = FALSE) 144 | } 145 | } 146 | 147 | globalVariables(c( 148 | ".", "Documento", "X1", "X2", "X3", "adv", "arq", "b", "booklet", 149 | "color", "date_link", "desc", "forma", "g", "head", "id", 150 | "info", "key", "link", "n", "n_processo", "nome", "r", "result", 151 | "rm_accent", "setNames", "value", "y", "cd_acordao", "id0", "id5", 152 | "id_processo", "item", "branch", "court", "txt_ementa", "val", "role", 153 | "name", "part", "name0", "name1", "name2", "name3", "name4", "name5", 154 | "class_subject", "comarca", "data_julgamento", "data_registro", 155 | "ementa", "id_decision", "id_lawsuit", "id_page", "orgao_julgador", 156 | "relatora", "txt_summary", "hidden", "data_publicacao", "Data", 157 | "Movimento", "movement", "description", "assunto", "classe", 158 | "data_de_disponibilizacao", "foro", "magistrado", "vara", "min_pag")) 159 | -------------------------------------------------------------------------------- /docs/reference/run_parser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Runs a parser — run_parser • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Runs a parser

    99 | 100 | 101 |
    run_parser(file, parser, path = ".", cores = 1)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 |
    file

    A character vector with the paths to one ore more files

    parser

    A parser returned by make_parser()

    path

    The path to a directory where to save RDSs

    cores

    The number of cores to be used when parsing

    123 | 124 | 125 |
    126 | 133 |
    134 | 135 |
    136 |
    137 |

    Site built with pkgdown

    138 |
    139 |
    140 |
    141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /docs/reference/peek_cjpg.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Check how long a call to <code><a href='download_cjpg.html'>download_cjpg()</a></code> will probably take — peek_cjpg • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Check how long a call to download_cjpg() will probably take

    99 | 100 | 101 |
    peek_cjpg(...)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 110 | 111 |
    ...

    Arguments passed on to download_cjpg() ( 109 | path will be ignored)

    112 | 113 |

    See also

    114 | 115 |

    download_cjpg(), cjpg_table()

    116 | 117 | 118 |
    119 | 128 |
    129 | 130 |
    131 |
    132 |

    Site built with pkgdown

    133 |
    134 |
    135 |
    136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /docs/reference/peek_cjsg.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Check how long a call to <code><a href='download_cjsg.html'>download_cjsg()</a></code> will probably take — peek_cjsg • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Check how long a call to download_cjsg() will probably take

    99 | 100 | 101 |
    peek_cjsg(...)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 110 | 111 |
    ...

    Arguments passed on to download_cjsg() ( 109 | path will be ignored)

    112 | 113 |

    See also

    114 | 115 |

    download_cjpg(), cjpg_table()

    116 | 117 | 118 |
    119 | 128 |
    129 | 130 |
    131 |
    132 |

    Site built with pkgdown

    133 |
    134 |
    135 |
    136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /R/cjpg_table.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Download information about some of CJPG's structures 3 | #' 4 | #' @description Downloads a table with information about lawsuit's 5 | #' classes, subjects or courts to help with [download_cjpg()]. You 6 | #' can also browse some of these tables with [browse_table()]. 7 | #' 8 | #' @param type Either `"classes"` or `"subjects"` or `"courts"` 9 | #' @param tj TJ from which to get data (only works with TJSP for now) 10 | #' @return A tibble with either 12 columns (if `type` is `"classes"` 11 | #' or `"subjects"`) or 3 columns (if `type` is `"courts"`) 12 | #' 13 | #' @seealso [download_cjpg()], [browse_table()] 14 | #' @export 15 | cjpg_table <- function(type, tj = "tjsp") { 16 | 17 | # Stop if TJ isn't TJSP 18 | stopifnot(tj == "tjsp") 19 | 20 | # If type is courts, redirect 21 | if (type == "courts") { return(cjpg_courts()) } 22 | 23 | # Translate type 24 | type <- switch (type, 25 | classes = "classe", 26 | subjects = "assunto") 27 | 28 | # Fetch table with information 29 | stringr::str_c( 30 | "https://esaj.tjsp.jus.br/cjpg/", type, 31 | "TreeSelect.do?campoId=", type) %>% 32 | httr::GET(httr::config(ssl_verifypeer = FALSE)) %>% 33 | httr::content('text') %>% 34 | xml2::read_html() %>% 35 | xml2::xml_find_all("//div[@class='treeView']") %>% 36 | purrr::modify(xml2::as_list) %>% 37 | dplyr::first() %>% dplyr::nth(2) %>% 38 | purrr::keep(~is.list(.x)) %>% 39 | tree_to_tibble() %>% 40 | dplyr::mutate( 41 | name0 = ifelse(is.na(name0), name5, name0), 42 | id0 = ifelse(is.na(id0), id5, id0)) %>% 43 | dplyr::select( 44 | dplyr::ends_with('0'), dplyr::ends_with('1'), 45 | dplyr::ends_with('2'), dplyr::ends_with('3'), 46 | dplyr::ends_with('4'), dplyr::ends_with('5')) 47 | } 48 | 49 | # Download table with court information for [cjpg_table()] 50 | cjpg_courts <- function() { 51 | 52 | # Function for creating the rows of the table 53 | create_row <- function(x) { 54 | 55 | # XPaths 56 | xp_parent <- ".//span[contains(@id, 'varas_tree')]" 57 | xp_child <- ".//li[@class='leafItem']//span[contains(@id, 'varas_tree')]" 58 | 59 | # Create row 60 | branch <- x %>% 61 | rvest::html_node(xpath = xp_parent) %>% 62 | rvest::html_text() 63 | x %>% 64 | rvest::html_nodes(xpath = xp_child) %>% 65 | purrr::map(~tibble::tibble( 66 | id = rvest::html_attr(.x, 'value'), 67 | court = rvest::html_text(.x))) %>% 68 | dplyr::bind_rows() %>% 69 | dplyr::mutate(branch = branch) 70 | } 71 | 72 | # Create courts table 73 | stringr::str_c( 74 | "https://esaj.tjsp.jus.br/cjpg/", 75 | "varasTreeSelect.do?campoId=varas") %>% 76 | httr::GET(httr::config(ssl_verifypeer = FALSE)) %>% 77 | httr::content('text') %>% 78 | xml2::read_html() %>% 79 | rvest::html_nodes('li.open') %>% 80 | rlang::as_list() %>% 81 | purrr::modify(create_row) %>% 82 | dplyr::bind_rows() %>% 83 | dplyr::select(branch, court, id) 84 | } 85 | 86 | #' @title Browse table returned by [cjpg_table()] or [cjsg_table()] 87 | #' 88 | #' @description This function uses a list of regex to filter CJPG 89 | #' and CJSG tables (only if they are of type `"classes"` or 90 | #' `"subjects"`) more easily than with `dplyr::select()`. For 91 | #' details on how the matching occurs, see **Matching**. 92 | #' 93 | #' @section Matching: For the matching to work properly, `patterns` 94 | #' should be a list of at most 6 character vectors, each one 95 | #' containing either one or a vector of regular expressions to 96 | #' be applied from left to right on columns `name0` to `name5` 97 | #' (note that vectors are ORed and different elements are ANDed). 98 | #' Example: If `patterns` looks something like 99 | #' `list(c("ADM", "CRIMINAL"), "", "", "", "", "Recurso")`, 100 | #' then we'll get back the rows where `name0` contains "ADM" 101 | #' **or** "CRIMINAL" **and** where `name5` contains "Recurso". 102 | #' 103 | #' @param table Table returned by [cjpg_table()] or [cjsg_table()] 104 | #' (only valid for `"classes"` or `"subjects"` types) 105 | #' @param patterns A list containing (at most) 6 character vectors 106 | #' of one or more regular expressions (applied from left to right 107 | #' on `name0` to `name5`), e.g., 108 | #' `list(c("ADM", "CRIMINAL"), "", "", "", "", "Recurso")` 109 | #' @return The original table filtered according to `patterns` 110 | #' 111 | #' @seealso [cjpg_table()], [cjsg_table()] 112 | #' @export 113 | browse_table <- function(table, patterns) { 114 | 115 | patterns <- purrr::modify(patterns, function(pat) { 116 | pat %>% 117 | stringr::str_c(collapse = "|") %>% 118 | stringr::str_c("(?:", ., ")") %>% 119 | stringr::str_replace("\\(\\?\\:\\)", "") 120 | }) 121 | 122 | # Transform NAs into matches 123 | str_detect <- function(string, pattern) { 124 | stringr::str_detect(string, pattern) %>% 125 | magrittr::inset(is.na(.) && pattern == "", TRUE) 126 | } 127 | 128 | # Apply filters 129 | table %>% 130 | dplyr::filter( 131 | str_detect(name0, patterns[[1]]), 132 | str_detect(name1, patterns[[2]]), 133 | str_detect(name2, patterns[[3]]), 134 | str_detect(name3, patterns[[4]]), 135 | str_detect(name4, patterns[[5]]), 136 | str_detect(name5, patterns[[6]])) 137 | } 138 | -------------------------------------------------------------------------------- /docs/reference/download_decision.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Download PDFs corresponding to judicial decisions — download_decision • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Given the unique identifier of a judicial decision 99 | (an 8 digit number), this function downloads the corresponding 100 | PDF.

    101 | 102 | 103 |
    download_decision(decision, path = ".", tj = "tjsp")
    104 | 105 |

    Arguments

    106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 |
    decision

    A character vector with decision IDs

    path

    Path to directory where to save PDF

    tj

    TJ from which to get data (only works with TJSP for now)

    121 | 122 |

    Value

    123 | 124 |

    A character vector with the path to the downloaded file

    125 | 126 | 127 |
    128 | 137 |
    138 | 139 |
    140 |
    141 |

    Site built with pkgdown

    142 |
    143 |
    144 |
    145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | img.icon { 2 | float: left; 3 | margin-left: -35px; 4 | width: 30px; 5 | height: 30px; 6 | } 7 | 8 | body {font-size: 16px;} 9 | h1 {font-size: 40px;} 10 | h2 {font-size: 30px;} 11 | 12 | /* Fixes for fixed navbar --------------------------*/ 13 | 14 | body { 15 | position: relative; 16 | padding-top: 65px; 17 | } 18 | 19 | .contents h1, .contents h2, .contents h3, .contents h4 { 20 | padding-top: 65px; 21 | margin-top: -45px; 22 | } 23 | 24 | .page-header { 25 | margin-top: 0; 26 | margin-bottom: 10px; 27 | padding-bottom: 0px; 28 | } 29 | 30 | /* Static header placement on mobile devices */ 31 | @media (max-width: 767px) { 32 | .navbar-fixed-top { 33 | position: absolute; 34 | } 35 | } 36 | 37 | .navbar-toggle { 38 | margin-top: 8px; 39 | margin-bottom: 5px; 40 | } 41 | 42 | .navbar-nav li a { 43 | padding-bottom: 10px; 44 | } 45 | .navbar-default .navbar-nav > .active > a, 46 | .navbar-default .navbar-nav > .active > a:hover, 47 | .navbar-default .navbar-nav > .active > a:focus { 48 | background-color: #eee; 49 | } 50 | 51 | /* Table of contents --------------------------*/ 52 | 53 | #sidebar { 54 | /* 55 | Needed to avoid bug in sticky-kit: 56 | https://github.com/leafo/sticky-kit/issues/169 57 | */ 58 | position:static; 59 | } 60 | 61 | #sidebar h2 { 62 | font-size: 1.6em; 63 | margin-top: 1em; 64 | margin-bottom: 0.25em; 65 | } 66 | 67 | #sidebar .list-unstyled li { 68 | margin-bottom: 0.5em; 69 | line-height: 1.4; 70 | } 71 | 72 | #sidebar small { 73 | color: #777; 74 | } 75 | 76 | /* Syntax highlighting ---------------------------------------------------- */ 77 | 78 | pre { 79 | word-wrap: normal; 80 | word-break: normal; 81 | border: none; 82 | } 83 | 84 | pre, code { 85 | background-color: #fafafa; 86 | color: #000000; 87 | -webkit-font-smoothing: subpixel-antialiased; 88 | } 89 | 90 | pre img { 91 | background-color: #fff; 92 | display: block; 93 | } 94 | 95 | code a, pre a { 96 | color: #375f84; 97 | } 98 | 99 | .fl {color: #1514b5;} 100 | .fu {color: #000000;} /* function */ 101 | .ch,.st {color: #036a07;} /* string */ 102 | .kw {color: #264D66;} /* keyword */ 103 | .co {color: #777777;} /* comment */ 104 | 105 | .message { color: black; font-weight: bolder;} 106 | .error { color: orange; font-weight: bolder;} 107 | .warning { color: orange; font-weight: normal;} 108 | 109 | 110 | /* Status --------------------------- */ 111 | 112 | .status-container { 113 | padding-top:32px; 114 | } 115 | 116 | .status-container a { 117 | display: block; 118 | margin-bottom: 5px; 119 | } 120 | 121 | /* For shrinking navbar ------------------ */ 122 | 123 | /* For big header 124 | 125 | &-brand { 126 | font-family: $font-family-monospace; 127 | font-weight: normal; 128 | font-size: 48px; 129 | padding: 35px 15px; 130 | 131 | padding-left: 84px; 132 | background-image:url(../logo.png); 133 | background-size: 60px auto; 134 | background-repeat: no-repeat; 135 | background-position: 15px center; 136 | 137 | } 138 | */ 139 | 140 | /* Reference index & topics ----------------------------------------------- */ 141 | 142 | .ref-index th {font-weight: normal;} 143 | .ref-index h2 {font-size: 20px;} 144 | 145 | .ref-index td {vertical-align: top;} 146 | .ref-index .alias {width: 40%;} 147 | .ref-index .title {width: 60%;} 148 | 149 | .ref-index .alias {width: 40%;} 150 | .ref-index .title {width: 60%;} 151 | 152 | .ref-arguments th {text-align: right; padding-right: 10px;} 153 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 154 | .ref-arguments .name {width: 10%;} 155 | .ref-arguments .desc {width: 90%;} 156 | 157 | /* For the rstudio footer ------- */ 158 | 159 | footer { 160 | margin-top: 45px; 161 | padding: 35px 0 36px; 162 | border-top: 1px solid #e5e5e5; 163 | 164 | display: flex; 165 | color: #666; 166 | } 167 | footer p { 168 | margin-bottom: 0; 169 | } 170 | footer .tidyverse { 171 | flex: 1; 172 | } 173 | footer .author { 174 | flex: 1; 175 | text-align: right; 176 | } 177 | 178 | /* ---------------------- hover anchor tags */ 179 | 180 | .hasAnchor { 181 | margin-left: -30px; 182 | } 183 | 184 | a.anchor { 185 | display:inline-block; 186 | width: 30px; 187 | height: 30px; 188 | visibility: hidden; 189 | 190 | background-image: url(./link.svg); 191 | background-repeat: no-repeat; 192 | background-size: 20px 20px; 193 | background-position: center center; 194 | } 195 | 196 | .hasAnchor:hover a.anchor { 197 | visibility: visible; 198 | } 199 | 200 | @media (max-width: 767px) { 201 | .hasAnchor {margin-left: 0;} 202 | a.anchor {float: right;} 203 | } 204 | 205 | /* Tweak appearance of navigation in sidebar ---------------------- */ 206 | 207 | #sidebar .nav { 208 | padding-left: 0px; 209 | list-style-type: none; 210 | color: #5a9ddb; 211 | } 212 | 213 | #sidebar .nav > li { 214 | padding: 10px 0 0px 20px; 215 | display: list-item; 216 | line-height: 20px; 217 | background-image: url(./tocBullet.svg); 218 | background-repeat: no-repeat; 219 | background-size: 16px 280px; 220 | background-position: left 0px; 221 | } 222 | 223 | #sidebar .nav > li.active { 224 | background-position: left -240px; 225 | } 226 | 227 | #sidebar a { 228 | padding: 0px; 229 | color: #5a9ddb; 230 | background-color: transparent; 231 | } 232 | 233 | #sidebar a:hover { 234 | background-color: transparent; 235 | text-decoration: underline; 236 | } 237 | -------------------------------------------------------------------------------- /docs/reference/download_2deg_lawsuit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Download second degree lawsuits filed in Brazilian Justice Courts — download_2deg_lawsuit • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 69 | 70 | 71 |
    72 | 73 |
    74 |
    75 | 78 | 79 | 80 |

    This function downloads lawsuits as PDFs. Given a lawsuit ID, and 81 | the path to a directory it will collect the lawsuit, and save it to 82 | the provided directory.

    83 | 84 | 85 |
    download_2deg_lawsuit(id, path = ".")
    86 | 87 |

    Arguments

    88 | 89 | 90 | 91 | 92 | 94 | 95 | 96 | 97 | 98 | 99 |
    id

    A character vector of one or more lawsuit IDs (only works with 93 | TJSP for now)

    path

    Path to the directory where the lawsuit should be saved

    100 | 101 |

    Value

    102 | 103 |

    A character vector with the path to the downloaded lawsuit

    104 | 105 |

    About lawsuits

    106 | 107 | 108 |

    The lawsuits contemplated by this funtion have to be filed in a 109 | Brazilian Tribunal de Justica (Justice Court). download_2deg_lawsuit() 110 | finds the lawsuit in its state's online Sistema de Automacao de Justica 111 | (Justice Automation System), solves the captcha withholding the 112 | information, and collects the PDF.

    113 | 114 |

    Implemented TJs

    115 | 116 | 117 |

    Unfortunatelly download_2deg_lawsuit() doesn't yet work with all 27 118 | TJs in Brazil. Here are the ones already implemented:

      119 |
    • TJSP (Sao Paulo)

    • 120 |
    121 | 122 | 123 |
    124 | 137 |
    138 | 139 |
    140 |
    141 |

    Site built with pkgdown

    142 |
    143 |
    144 |
    145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /docs/reference/download_lawsuit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Download first degree lawsuits filed in Brazilian Justice Courts — download_lawsuit • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 69 | 70 | 71 |
    72 | 73 |
    74 |
    75 | 78 | 79 | 80 |

    This function downloads lawsuits as PDFs. Given a lawsuit ID, and 81 | the path to a directory it will collect the lawsuit, and save it to 82 | the provided directory.

    83 | 84 | 85 |
    download_lawsuit(id, path = ".")
    86 | 87 |

    Arguments

    88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 |
    id

    A character vector of one or more lawsuit IDs

    path

    Path to the directory where the lawsuit should be saved

    99 | 100 |

    Value

    101 | 102 |

    A character vector with the path to the downloaded lawsuit

    103 | 104 |

    About lawsuits

    105 | 106 | 107 |

    The lawsuits contemplated by this funtion have to be filed in a 108 | Brazilian Tribunal de Justica (Justice Court). download_lawsuit() 109 | finds the lawsuit in its state's online Sistema de Automacao de Justica 110 | (Justice Automation System), solves the captcha withholding the 111 | information, and collects the PDF.

    112 | 113 |

    Implemented TJs

    114 | 115 | 116 |

    Unfortunatelly download_lawsuit() doesn't yet work with all 27 TJs in 117 | Brazil. Here are the ones already implemented:

      118 |
    • TJAM (Amazonas)

    • 119 |
    • TJBA (Bahia)

    • 120 |
    • TJSC (Santa Catarina)

    • 121 |
    • TJSP (Sao Paulo)

    • 122 |
    123 | 124 | 125 |
    126 | 139 |
    140 | 141 |
    142 |
    143 |

    Site built with pkgdown

    144 |
    145 |
    146 |
    147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /docs/reference/cjpg_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Download information about some of CJPG's structures — cjpg_table • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Downloads a table with information about lawsuit's 99 | classes, subjects or courts to help with download_cjpg(). You 100 | can also browse some of these tables with browse_table().

    101 | 102 | 103 |
    cjpg_table(type, tj = "tjsp")
    104 | 105 |

    Arguments

    106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 |
    type

    Either "classes" or "subjects" or "courts"

    tj

    TJ from which to get data (only works with TJSP for now)

    117 | 118 |

    Value

    119 | 120 |

    A tibble with either 12 columns (if type is "classes" 121 | or "subjects") or 3 columns (if type is "courts")

    122 | 123 |

    See also

    124 | 125 |

    download_cjpg(), browse_table()

    126 | 127 | 128 |
    129 | 140 |
    141 | 142 |
    143 |
    144 |

    Site built with pkgdown

    145 |
    146 |
    147 |
    148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/reference/cjsg_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Download information about some of CJSG's structures — cjsg_table • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Downloads a table with information about lawsuit's 99 | classes, subjects or courts to help with download_cjsg(). You 100 | can also browse some of these tables with browse_table().

    101 | 102 | 103 |
    cjsg_table(type, tj = "tjsp")
    104 | 105 |

    Arguments

    106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 |
    type

    Either "classes" or "subjects" or "courts"

    tj

    TJ from which to get data (only works with TJSP for now)

    117 | 118 |

    Value

    119 | 120 |

    A tibble with either 12 columns (if type is "classes" 121 | or "subjects") or 3 columns (if type is "courts")

    122 | 123 |

    See also

    124 | 125 |

    download_cjpg(), browse_table()

    126 | 127 | 128 |
    129 | 140 |
    141 | 142 |
    143 |
    144 |

    Site built with pkgdown

    145 |
    146 |
    147 |
    148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/reference/parse_cjpg.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | CJPG parser — parse_cjpg • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parser for files downloaded by download_cjpg().

    99 | 100 | 101 |
    parse_cjpg(arqs)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    arqs

    Character vector with the paths of the files to be parsed

    111 | 112 |

    Value

    113 | 114 |

    A tibble with the columns

      115 |
    • arq Name of the file

    • 116 |
    • id ID found in the page read

    • 117 |
    • cd_acordao Unique ID of the ruling

    • 118 |
    • n_processo Number of the lawsuit (doesn't have to be unique)

    • 119 |
    • comarca Name of the district

    • 120 |
    • data_julgamento Date of the judgement (%d/%m/%Y)

    • 121 |
    • data_registro Date of registration in the system (%d/%m/%Y)

    • 122 |
    • ementa Summary of the ruling

    • 123 |
    • orgao_julgador Body responsible for the appeal

    • 124 |
    • outros_numeros Old/additional IDs

    • 125 |
    • relatora Name of the rapporteur

    • 126 |
    • classe_assunto Class/subject, separated by slashes

    • 127 |
    • txt_ementa Text of the summary with no formatting

    • 128 |
    129 | 130 | 131 | 132 |
    133 | 142 |
    143 | 144 |
    145 |
    146 |

    Site built with pkgdown

    147 |
    148 |
    149 |
    150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/reference/parse_cjsg.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | CJSG parser — parse_cjsg • esaj 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 33 | 34 | 35 | 36 |
    37 |
    38 | 87 | 88 | 89 |
    90 | 91 |
    92 |
    93 | 96 | 97 | 98 |

    Parser for files downloaded by download_cjsg().

    99 | 100 | 101 |
    parse_cjsg(arqs)
    102 | 103 |

    Arguments

    104 | 105 | 106 | 107 | 108 | 109 | 110 |
    arqs

    Character vector with the paths of the files to be parsed

    111 | 112 |

    Value

    113 | 114 |

    A tibble with the columns

      115 |
    • arq Name of the file

    • 116 |
    • id ID found in the page read

    • 117 |
    • cd_acordao Unique ID of the ruling

    • 118 |
    • n_processo Number of the lawsuit (doesn't have to be unique)

    • 119 |
    • comarca Name of the district

    • 120 |
    • data_julgamento Date of the judgement (%d/%m/%Y)

    • 121 |
    • data_registro Date of registration in the system (%d/%m/%Y)

    • 122 |
    • ementa Summary of the ruling

    • 123 |
    • orgao_julgador Body responsible for the appeal

    • 124 |
    • outros_numeros Old/additional IDs

    • 125 |
    • relatora Name of the rapporteur

    • 126 |
    • classe_assunto Class/subject, separated by slashes

    • 127 |
    • txt_ementa Text of the summary with no formatting

    • 128 |
    129 | 130 | 131 | 132 |
    133 | 142 |
    143 | 144 | 149 |
    150 | 151 | 152 | 153 | --------------------------------------------------------------------------------