├── .Rproj.user ├── 6400D2F9 │ ├── cpp-definition-cache │ ├── sdb │ │ └── prop │ │ │ ├── 1EBC0BC2 │ │ │ ├── 3881D3BE │ │ │ ├── 5BEA6FD4 │ │ │ ├── 67865F02 │ │ │ ├── 741CD66D │ │ │ ├── 8DEFE4DD │ │ │ ├── AEC4A068 │ │ │ ├── DB4BBF72 │ │ │ ├── 1B3FF799 │ │ │ ├── 60D253CC │ │ │ ├── 7D846616 │ │ │ ├── AB15AAE │ │ │ ├── B821A3B8 │ │ │ ├── FA247785 │ │ │ └── INDEX │ ├── saved_source_markers │ ├── pcs │ │ ├── source-pane.pper │ │ ├── workbench-pane.pper │ │ ├── debug-breakpoints.pper │ │ ├── files-pane.pper │ │ └── windowlayoutstate.pper │ ├── session-persistent-state │ ├── rmd-outputs │ └── persistent-state └── shared │ └── notebooks │ └── paths ├── .Rbuildignore ├── .RData ├── .gitignore ├── .DS_Store ├── docs ├── untangle.pdf ├── images │ ├── csvlist.png │ ├── excellist.png │ ├── tweet_images.png │ └── memeprovider833888807959289856.png ├── index.Rmd ├── bulk_csv.Rmd ├── bulk_excel.Rmd ├── tweet_cap.Rmd └── untangle.Rmd ├── NAMESPACE ├── R ├── setup_folders.R ├── dl_file.R ├── packagr.R ├── untangle.R ├── tweet_cap.R ├── bulk_excel.R └── bulk_csv.R ├── man ├── setup_folders.Rd ├── dl_file.Rd ├── tweet_cap.Rd ├── packagr.Rd ├── bulk_excel.Rd ├── untangle.Rd └── bulk_csv.Rd ├── muckrakr.Rproj ├── DESCRIPTION ├── readme.MD └── .Rhistory /.Rproj.user/6400D2F9/cpp-definition-cache: -------------------------------------------------------------------------------- 1 | [ 2 | ] -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/1EBC0BC2: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/3881D3BE: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/5BEA6FD4: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/67865F02: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/741CD66D: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/8DEFE4DD: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/AEC4A068: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/DB4BBF72: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.RData -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/saved_source_markers: -------------------------------------------------------------------------------- 1 | {"active_set":"","sets":[]} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/pcs/source-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "activeTab" : 2 3 | } -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.DS_Store -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/1B3FF799: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled2" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/60D253CC: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/7D846616: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/AB15AAE: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/B821A3B8: -------------------------------------------------------------------------------- 1 | { 2 | "tempName" : "Untitled1" 3 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/session-persistent-state: -------------------------------------------------------------------------------- 1 | virtual-session-id="5DBF5373" 2 | -------------------------------------------------------------------------------- /docs/untangle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/untangle.pdf -------------------------------------------------------------------------------- /docs/images/csvlist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/csvlist.png -------------------------------------------------------------------------------- /docs/images/excellist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/excellist.png -------------------------------------------------------------------------------- /docs/images/tweet_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/tweet_images.png -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/FA247785: -------------------------------------------------------------------------------- 1 | { 2 | "last_setup_crc32" : "DAF4EF54bb338d19", 3 | "tempName" : "Untitled1" 4 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/pcs/workbench-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "TabSet1" : 2, 3 | "TabSet2" : 0, 4 | "TabZoom" : { 5 | } 6 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/pcs/debug-breakpoints.pper: -------------------------------------------------------------------------------- 1 | { 2 | "debugBreakpointsState" : { 3 | "breakpoints" : [ 4 | ] 5 | } 6 | } -------------------------------------------------------------------------------- /docs/images/memeprovider833888807959289856.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/memeprovider833888807959289856.png -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bulk_csv) 4 | export(bulk_excel) 5 | export(dl_file) 6 | export(packagr) 7 | export(setup_folders) 8 | export(tweet_cap) 9 | export(untangle) 10 | -------------------------------------------------------------------------------- /.Rproj.user/shared/notebooks/paths: -------------------------------------------------------------------------------- 1 | /Users/andrewtran/Projects/muckrakr/R/dl_file.R="9577CFAE" 2 | /Users/andrewtran/Projects/muckrakr/R/packagr.R="E8A46326" 3 | /Users/andrewtran/Projects/muckrakr/R/setup_folders.R="F50121FC" 4 | -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/rmd-outputs: -------------------------------------------------------------------------------- 1 | ~/Documents/Github/muckrakr/docs/tweet_cap.html 2 | ~/Documents/Github/muckrakr/tweet_cap.html 3 | ~/Documents/Github/muckrakr/tweet_cap.html 4 | ~/Documents/Github/muckrakr/docs/tweet_cap.html 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/pcs/files-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "path" : "~/Documents/Github/muckrakr/R", 3 | "sortOrder" : [ 4 | { 5 | "ascending" : false, 6 | "columnIndex" : 2 7 | }, 8 | { 9 | "ascending" : false, 10 | "columnIndex" : 4 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/pcs/windowlayoutstate.pper: -------------------------------------------------------------------------------- 1 | { 2 | "left" : { 3 | "panelheight" : 813, 4 | "splitterpos" : 339, 5 | "topwindowstate" : "NORMAL", 6 | "windowheight" : 851 7 | }, 8 | "right" : { 9 | "panelheight" : 813, 10 | "splitterpos" : 511, 11 | "topwindowstate" : "NORMAL", 12 | "windowheight" : 851 13 | } 14 | } -------------------------------------------------------------------------------- /R/setup_folders.R: -------------------------------------------------------------------------------- 1 | #' Command to set up an optimized folder structure for your project 2 | #' 3 | #' Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts` 4 | #' 5 | #' @export 6 | #' @examples 7 | #' setup_folders() 8 | 9 | 10 | setup_folders <- function(){ 11 | 12 | folder_names <- c("raw_data", "output_data", "rmd", "docs", "scripts") 13 | 14 | sapply(folder_names, dir.create) 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/setup_folders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/setup_folders.R 3 | \name{setup_folders} 4 | \alias{setup_folders} 5 | \title{Command to set up an optimized folder structure for your project} 6 | \usage{ 7 | setup_folders() 8 | } 9 | \description{ 10 | Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts` 11 | } 12 | \examples{ 13 | setup_folders() 14 | } 15 | -------------------------------------------------------------------------------- /muckrakr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /docs/index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "muckrakr" 3 | output: html_document 4 | --- 5 | 6 | ---- 7 | 8 | A growing collection of R functions and relationship files for data journalists. 9 | 10 | Functions so far: 11 | 12 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)] 13 | 14 | 15 | ## Installation 16 | 17 | ---- 18 | 19 | ``` 20 | #install.packages("devtools") 21 | devtools:install_github("andrewbtran/muckrakr") 22 | ``` 23 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: muckrakr 2 | Type: Package 3 | Title: R tools for journalists 4 | Version: 0.1.0 5 | Authors@R: c( 6 | person("Andrew", "Tran", email = "abtran@gmail.com", role = "cre"), 7 | person("Robert", "Kabacoff", email = "rkabacoff@wesleyan.edu", role = "aut")) 8 | Description: A collection of tools and relationship files for data journalists. 9 | URL: http://andrewbtran.github.io/muckrakr/ 10 | BugReports: https://github.com/andrewbtran/muckrakr/issues 11 | License: What license is it under? 12 | Encoding: UTF-8 13 | LazyData: true 14 | RoxygenNote: 6.0.1 15 | -------------------------------------------------------------------------------- /man/dl_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dl_file.R 3 | \name{dl_file} 4 | \alias{dl_file} 5 | \title{Downloads file from URL if file doesn't exist in set folder} 6 | \usage{ 7 | dl_file(folder = "data", link = "filenamedefaultbulkcsv2017.csv") 8 | } 9 | \arguments{ 10 | \item{folder}{The folder in relation to your working directory where the file should exist. Default folder is `data`.} 11 | 12 | \item{link}{The link to where the file exists} 13 | } 14 | \description{ 15 | Downloads file from URL if file doesn't exist in set folder 16 | } 17 | \examples{ 18 | dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv") 19 | } 20 | \keyword{and} 21 | \keyword{checking} 22 | \keyword{downloading} 23 | \keyword{file} 24 | -------------------------------------------------------------------------------- /man/tweet_cap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tweet_cap.R 3 | \name{tweet_cap} 4 | \alias{tweet_cap} 5 | \title{Generate a screenshot of a tweet} 6 | \usage{ 7 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP", 8 | folder = "DEFAULTTWEETCAP") 9 | } 10 | \arguments{ 11 | \item{link}{the URL of the tweet} 12 | 13 | \item{filename}{the base name of the image you want to save. Default will be user's twitter handle.} 14 | 15 | \item{folder}{the folder you want to save the screenshot to} 16 | } 17 | \description{ 18 | Quick screenshot of a tweet based on a link 19 | } 20 | \examples{ 21 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 22 | } 23 | \keyword{screenshot} 24 | \keyword{twitter} 25 | -------------------------------------------------------------------------------- /man/packagr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packagr.R 3 | \name{packagr} 4 | \alias{packagr} 5 | \title{Downloading packages that are needed but aren't yet on the system} 6 | \usage{ 7 | packagr(packages = c("tidyverse"), gh_packages = c("abtran/muckrakr")) 8 | } 9 | \arguments{ 10 | \item{packages}{Array of packages found on CRAN} 11 | 12 | \item{gh_packages}{Array of packages found on Github} 13 | } 14 | \description{ 15 | Downloading packages that are needed but aren't yet on the system 16 | } 17 | \examples{ 18 | packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr")) 19 | } 20 | \keyword{Downloading} 21 | \keyword{are} 22 | \keyword{aren't} 23 | \keyword{but} 24 | \keyword{needed} 25 | \keyword{on} 26 | \keyword{packages} 27 | \keyword{system} 28 | \keyword{that} 29 | \keyword{the} 30 | \keyword{yet} 31 | -------------------------------------------------------------------------------- /R/dl_file.R: -------------------------------------------------------------------------------- 1 | #' Downloads file from URL if file doesn't exist in set folder 2 | #' 3 | #' @param folder The folder in relation to your working directory where the file should exist. Default folder is `data`. 4 | #' @param link The link to where the file exists 5 | #' @keywords file checking and downloading 6 | #' @export 7 | #' @examples 8 | #' dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv") 9 | 10 | dl_file <- function(folder="data", link="filenamedefaultbulkcsv2017.csv"){ 11 | 12 | file_name <- gsub(".*\\/", "", link) 13 | file_folder <- paste0(folder, "/", file_name) 14 | 15 | if (!file.exists(file_folder)) { 16 | 17 | dir.create(folder, showWarnings = F) 18 | download.file( 19 | link, 20 | file_folder) 21 | print("File download complete") 22 | } else { 23 | print("File already exists") 24 | } 25 | 26 | } 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/sdb/prop/INDEX: -------------------------------------------------------------------------------- 1 | ~%2FDocuments%2FGithub%2Fmuckrakr%2F.gitignore="5BEA6FD4" 2 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FDESCRIPTION="8DEFE4DD" 3 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FNAMESPACE="1EBC0BC2" 4 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_csv.R="7D846616" 5 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_excel.R="1B3FF799" 6 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Ftweet_cap.R="B821A3B8" 7 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Funtangle.R="67865F02" 8 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_csv.Rmd="60D253CC" 9 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_excel.Rmd="AB15AAE" 10 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Findex.Rmd="741CD66D" 11 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Ftweet_cap.Rmd="3881D3BE" 12 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Funtangle.Rmd="DB4BBF72" 13 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Freadme.MD="AEC4A068" 14 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Ftweet_cap.Rmd="FA247785" 15 | -------------------------------------------------------------------------------- /man/bulk_excel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bulk_excel.R 3 | \name{bulk_excel} 4 | \alias{bulk_excel} 5 | \title{Creating a single data frame from a folder of similarly structured Excel spreadsheets} 6 | \usage{ 7 | bulk_excel(folder = "DEFAULTBULKCSV2017", 8 | export = "filenamedefaultbulkcsv2017.csv", sheet = 1, col_names = TRUE, 9 | col_types = NULL, na = "", skip = 0) 10 | } 11 | \arguments{ 12 | \item{folder}{The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.} 13 | 14 | \item{export}{File name to export csv file as, if wanted.} 15 | } 16 | \description{ 17 | Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet. 18 | } 19 | \examples{ 20 | bulk_excel(folder="data", export="combined_data.xls") 21 | } 22 | \keyword{appending} 23 | \keyword{data} 24 | -------------------------------------------------------------------------------- /man/untangle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/untangle.R 3 | \name{untangle} 4 | \alias{untangle} 5 | \title{Disentangling a complex variable} 6 | \usage{ 7 | untangle(data, x, pattern, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{data}{The name of the dataframe you} 11 | 12 | \item{x}{Column to untangle} 13 | 14 | \item{pattern}{Special characters that separate the variables in the column} 15 | 16 | \item{verbose}{TRUE or FALSE} 17 | } 18 | \description{ 19 | We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful. 20 | The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes 21 | } 22 | \examples{ 23 | untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE) 24 | } 25 | \keyword{data} 26 | \keyword{wrangling} 27 | -------------------------------------------------------------------------------- /man/bulk_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bulk_csv.R 3 | \name{bulk_csv} 4 | \alias{bulk_csv} 5 | \title{Creating a single data frame from a folder of similarly structure CSV spreadsheets} 6 | \usage{ 7 | bulk_csv(folder = "DEFAULTBULKCSV2017", 8 | export = "filenamedefaultbulkcsv2017.csv", col_names = TRUE, 9 | col_types = NULL, na = c("", "NA"), quoted_na = TRUE, comment = "", 10 | trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max), 11 | progress = interactive()) 12 | } 13 | \arguments{ 14 | \item{folder}{The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.} 15 | 16 | \item{export}{File name to export csv file as, if wanted.} 17 | } 18 | \description{ 19 | Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet. 20 | } 21 | \examples{ 22 | bulk_csv(folder="data", export="combined_data.csv") 23 | } 24 | \keyword{appending} 25 | \keyword{data} 26 | -------------------------------------------------------------------------------- /readme.MD: -------------------------------------------------------------------------------- 1 | # muckrakr 2 | 3 | ---- 4 | 5 | A growing collection of R functions and relationship files for data journalists. 6 | 7 | Functions so far: 8 | 9 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)] 10 | * `bulk_csv` - Creating a large data frame by appending a folder of CSV files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_csv.html)] 11 | * `bulk_excel` - Creating a large data frame by appending a folder of Excel files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_excel.html)] 12 | * `tweet_cap` - Output a screenshot of a tweet (or list of tweets) by URL [[Documentation](http://andrewbtran.github.com/muckrakr/tweet_cap.html)] 13 | * `setup_folders` - Command to set up an optimized folder structure for your project [Documentation to come] 14 | * `dl_file` - Downloads file from URL if file doesn't exist in set folder [Documentation to come] 15 | * `packagr` - Downloading packages that are needed but aren't yet on the system [Documentation to come] 16 | 17 | ## Installation 18 | 19 | ---- 20 | 21 | ``` 22 | #install.packages("devtools") 23 | devtools::install_github("andrewbtran/muckrakr") 24 | ``` 25 | -------------------------------------------------------------------------------- /.Rproj.user/6400D2F9/persistent-state: -------------------------------------------------------------------------------- 1 | build-last-errors="[]" 2 | build-last-errors-base-dir="~/Documents/Github/muckrakr/" 3 | build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source muckrakr\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/3.3/Resources/library’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘muckrakr’ ...\\n\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** preparing package for lazy loading\\n\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (muckrakr)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" 4 | compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" 5 | console_procs="[]" 6 | files.monitored-path="" 7 | find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" 8 | imageDirtyState="0" 9 | saveActionState="0" 10 | -------------------------------------------------------------------------------- /R/packagr.R: -------------------------------------------------------------------------------- 1 | #' Downloading packages that are needed but aren't yet on the system 2 | #' 3 | #' @param packages Array of packages found on CRAN 4 | #' @param gh_packages Array of packages found on Github 5 | #' @keywords Downloading packages that are needed but aren't yet on the system 6 | #' @export 7 | #' @examples 8 | #' packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr")) 9 | 10 | packagr <- function(packages=c("tidyverse"), gh_packages=c("abtran/muckrakr")){ 11 | require(devtools) 12 | 13 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE) 14 | if(any(!check)){ 15 | pkgs.missing <- packages[!check] 16 | install.packages(pkgs.missing) 17 | check <- sapply(pkgs.missing,require,warn.conflicts = TRUE,character.only = TRUE) 18 | } 19 | 20 | if (gh_packages!="abtran/muckrakr") { 21 | packages <- gsub(".*\\/", "", gh_packages) 22 | gh_packages_df <- data.frame(gh_packages) 23 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE) 24 | if(any(!check)){ 25 | pkgs.missing <- packages[!check] 26 | 27 | gh_packages_df <- filter(gh_packages_df, grepl(paste(pkgs.missing, collapse="|"), gh_packages)) 28 | devtools::install_github(gh_packages_df$gh_packages) 29 | 30 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE) 31 | 32 | } 33 | } 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /R/untangle.R: -------------------------------------------------------------------------------- 1 | #' Disentangling a complex variable 2 | #' 3 | #' We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful. 4 | #' The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes 5 | #' @param data The name of the dataframe you 6 | #' @param x Column to untangle 7 | #' @param pattern Special characters that separate the variables in the column 8 | #' @param verbose TRUE or FALSE 9 | #' @keywords data wrangling 10 | #' @export 11 | #' @examples 12 | #' untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE) 13 | 14 | untangle <- function(data, x, pattern, verbose=FALSE){ 15 | require(stringr) 16 | 17 | variable <- str_to_lower(data[[x]]) 18 | 19 | # obtain list of unique codes 20 | code_matrix <- str_split(variable, pattern, simplify=TRUE) 21 | code_vector <- as.character(code_matrix) 22 | code_vector <- str_trim(code_vector) 23 | code_vector <- unique(code_vector[code_vector != ""]) 24 | if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n") 25 | 26 | # create dummy codes matrix 27 | nobs <- nrow(data) 28 | ncodes <- length(code_vector) 29 | dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes) 30 | 31 | # add 1/0 codes 32 | for(i in 1:ncodes){ 33 | dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0) 34 | } 35 | 36 | # add dummy code names 37 | dummy_codes <- as.data.frame(dummy_codes) 38 | codenames <- str_replace_all(code_vector, " ", "_") 39 | names(dummy_codes) <- codenames 40 | 41 | # add to data frame 42 | newdata <- cbind(data, dummy_codes) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /docs/bulk_csv.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "bulk_csv()" 3 | author: "Andrew Ba Tran" 4 | date: "March 30, 2017" 5 | output: html_document 6 | --- 7 | 8 | Creating a single data frame from a folder of similarly structure CSV spreadsheets. 9 | 10 | ## Description 11 | 12 | Like Voltron-ing your data 13 | 14 | ## Usage 15 | 16 | bulk_csv(folder = "DEFAULTBULKCSV2017", 17 | export = "filenamedefaultbulkcsv2017.csv") 18 | 19 | ## Arguments 20 | 21 | * `folder` - The folder in relation to your working directory where the csv files exist. Default folder is the current working directory. 22 | * `export` - File name to export csv file as, if wanted. 23 | 24 | ## Note 25 | 26 | This package is based on [**readr**](https://blog.rstudio.org/2015/04/09/readr-0-1-0/), so passing on variables from `read_csv` will work in `bulk_csv`. These variables will apply to the import of each CSV sheet. 27 | 28 | 29 | ### Documentation 30 | 31 | In this example, these CSVs reside in a folder called **state_data**. 32 | 33 | ```{r setup, include=FALSE} 34 | knitr::opts_chunk$set(echo = FALSE, message = FALSE) 35 | ``` 36 | 37 |
38 |
![Folder full of CSVs](images/csvlist.png)
39 | 40 | ---- 41 | 42 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent. 43 | 44 | Or the bulk join will not work. 45 | 46 | To create a new dataframe with all the data sets combined, the command would be: 47 | 48 | `combined_data <- bulk_csv(folder="state_data")` 49 | 50 | To simply export the data as a new combined CSV file, the command would be: 51 | 52 | `bulk_csv(folder="state_data", export="combined_data.csv")` 53 | 54 | This would save the file to the current working directory. 55 | 56 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like: 57 | 58 | `bulk_csv(folder="state_data", export="data_export/combined_data.csv")` 59 | 60 | -------------------------------------------------------------------------------- /docs/bulk_excel.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "bulk_excel()" 3 | author: "Andrew Ba Tran" 4 | date: "March 30, 2017" 5 | output: html_document 6 | --- 7 | 8 | Creating a single data frame from a folder of similarly structure Excel spreadsheets. 9 | 10 | ## Description 11 | 12 | Like Voltron-ing your data 13 | 14 | ## Usage 15 | 16 | bulk_excel(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls") 17 | 18 | ## Arguments 19 | 20 | * `folder` - The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory. 21 | * `export` - File name to export CSV file (Not as an Excel file, sorry) as, if wanted. 22 | 23 | ## Note 24 | 25 | This package is based on [**readxl**](https://github.com/tidyverse/readxl), so passing on variables from `read_excel` will work in `bulk_excel`. These variables will apply to the import of each Excel spreadsheet. 26 | 27 | ### Documentation 28 | 29 | In this example, these Excel spreadsheets reside in a folder called **state_data**. 30 | 31 | ```{r setup, include=FALSE} 32 | knitr::opts_chunk$set(echo = FALSE, message = FALSE) 33 | ``` 34 | 35 |
36 |
![Folder full of Excel sheets](images/excellist.png)
37 | 38 | ---- 39 | 40 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent. 41 | 42 | Or the bulk join will not work. 43 | 44 | To create a new dataframe with all the data sets combined, the command would be: 45 | 46 | `combined_data <- bulk_excel(folder="state_data")` 47 | 48 | To simply export the data as a new combined CSV file, the command would be: 49 | 50 | `bulk_excel(folder="state_data", export="combined_data.csv")` 51 | 52 | This would save the file to the current working directory. 53 | 54 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like: 55 | 56 | `bulk_excel(folder="state_data", export="data_export/combined_data.csv")` 57 | 58 | -------------------------------------------------------------------------------- /R/tweet_cap.R: -------------------------------------------------------------------------------- 1 | #' Generate a screenshot of a tweet 2 | #' 3 | #' Quick screenshot of a tweet based on a link 4 | #' @param link the URL of the tweet 5 | #' @param filename the base name of the image you want to save. Default will be user's twitter handle. 6 | #' @param folder the folder you want to save the screenshot to 7 | #' @keywords twitter screenshot 8 | #' @export 9 | #' @examples 10 | #' tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 11 | 12 | 13 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 14 | 15 | if(!require(devtools)){ 16 | install.packages("devtools") 17 | library(devtools) 18 | } 19 | if(!require(webshot)){ 20 | devtools::install_github("wch/webshot") 21 | library(webshot) 22 | } 23 | 24 | if (folder=="DEFAULTTWEETCAP") { 25 | folder=getwd() 26 | } else { 27 | folder <- paste0(getwd(), "/", folder) 28 | } 29 | 30 | if (length(link)==1) { 31 | link = link 32 | username <- gsub("https://twitter.com/", "", link) 33 | username <- gsub("/.*","",username, fixed=F) 34 | if (filename=="NOTHINGTWEET_CAP") { 35 | pre_name <- username 36 | } else { 37 | pre_name <- filename 38 | } 39 | id_num <- gsub(".*/", "", link) 40 | image_name <- paste0(pre_name, id_num, ".png") 41 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 42 | 43 | } 44 | 45 | if (length(link)>1) { 46 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 47 | 48 | for (i in 1:length(link)){ 49 | 50 | link_i = link[i] 51 | username <- gsub("https://twitter.com/", "", link_i) 52 | username <- gsub("/.*","",username, fixed=F) 53 | 54 | if (filename=="NOTHINGTWEET_CAP") { 55 | pre_name <- username 56 | } else { 57 | pre_name <- filename 58 | } 59 | id_num <- gsub(".*/", "", link_i) 60 | image_name <- paste0(pre_name, id_num, ".png") 61 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 62 | setTxtProgressBar(pb, i) 63 | } 64 | } 65 | 66 | 67 | } 68 | 69 | -------------------------------------------------------------------------------- /docs/tweet_cap.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "tweet_cap()" 3 | author: "Andrew" 4 | date: "3/31/2017" 5 | output: html_document 6 | --- 7 | 8 | Generate a screenshot of a tweet 9 | 10 | ## Description 11 | 12 | Quick screenshot of a tweet based on a link or array of links. The image will represent how the tweet looks on a browser. 13 | 14 | ## Usage 15 | 16 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP", folder = "DEFAULTTWEETCAP") 17 | 18 | ## Arguments 19 | 20 | 21 | * `link` - the URL of the tweet 22 | * `filename` -the base name of the image you want to save. Default will be user's twitter handle. 23 | * `folder` - the folder you want to save the screenshot to. If ignored, the folder will be the working directory. 24 | 25 | ### Documentation 26 | 27 | #### Single tweet 28 | 29 | To grat one tweet, just put in the URL address. 30 | 31 | `tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")` 32 | 33 | This image will be generated to the **images** folder. 34 | 35 |
36 |
![Single image](images/memeprovider833888807959289856.png)
37 | 38 | ---- 39 | 40 | #### Multiple tweets 41 | 42 | If you had a data frame of links: 43 | 44 | ```{r tweets_list, warning=F, message=F} 45 | library(tidyverse) 46 | library(knitr) 47 | ``` 48 | 49 | ```{r loading_data} 50 | tweets <- tribble( 51 | ~Link, ~Account, 52 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump", 53 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump", 54 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump", 55 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator", 56 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo" 57 | ) 58 | 59 | kable(tweets) 60 | ``` 61 | 62 | To generate a folder of images of the list of tweets, use: 63 | 64 | ```{r, eval=F} 65 | tweet_cap(tweets$Link, folder="images") 66 | ``` 67 | 68 | And you'll get: 69 | 70 |
71 |
![Folder of images](images/tweet_images.png)
72 | -------------------------------------------------------------------------------- /docs/untangle.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Disentangling a complex variable" 3 | author: "Rob Kabacoff, Ph.D." 4 | date: "March 9, 2017" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = FALSE, message = FALSE) 10 | ``` 11 | 12 | ## Untangling a complex variable 13 | 14 | We are occassionally faced with a data set variable that contains multiple pieces of information. Consider the variable **charges** in the following dataset **test_data**. 15 | 16 | ```{r dataset, warning=F, message=F} 17 | test_data <- data.frame(id=1:4, 18 | charges = c("Murder,Kidnapping,Burglary", 19 | "burglary: assault and battery", 20 | "murder", 21 | "")) 22 | knitr::kable(test_data, cpation="test_data") 23 | ``` 24 | 25 | Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful. 26 | 27 | The **untangle** function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes: 28 | 29 | ```{r function, warning=F} 30 | untangle <- function(data, x, pattern, verbose=FALSE){ 31 | require(stringr) 32 | 33 | variable <- str_to_lower(data[[x]]) 34 | 35 | # obtain list of unique codes 36 | code_matrix <- str_split(variable, pattern, simplify=TRUE) 37 | code_vector <- as.character(code_matrix) 38 | code_vector <- str_trim(code_vector) 39 | code_vector <- unique(code_vector[code_vector != ""]) 40 | if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n") 41 | 42 | # create dummy codes matrix 43 | nobs <- nrow(data) 44 | ncodes <- length(code_vector) 45 | dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes) 46 | 47 | # add 1/0 codes 48 | for(i in 1:ncodes){ 49 | dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0) 50 | } 51 | 52 | # add dummy code names 53 | dummy_codes <- as.data.frame(dummy_codes) 54 | codenames <- str_replace_all(code_vector, " ", "_") 55 | names(dummy_codes) <- codenames 56 | 57 | # add to data frame 58 | newdata <- cbind(data, dummy_codes) 59 | 60 | } 61 | 62 | ``` 63 | 64 | ```{r, echo=TRUE, warning=F} 65 | test_data <- untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE) 66 | 67 | ``` 68 | 69 | 70 | 71 | ```{r results} 72 | knitr::kable(test_data, caption="New test_data") 73 | 74 | ``` 75 | 76 | 77 | -------------------------------------------------------------------------------- /R/bulk_excel.R: -------------------------------------------------------------------------------- 1 | #' Creating a single data frame from a folder of similarly structured Excel spreadsheets 2 | #' 3 | #' Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet. 4 | 5 | #' @param folder The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory. 6 | #' @param export File name to export csv file as, if wanted. 7 | #' @keywords data appending 8 | #' @export 9 | #' @examples 10 | #' bulk_excel(folder="data", export="combined_data.xls") 11 | 12 | # This is the read.csv method using lapply 13 | 14 | #bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls"){ 15 | # require(dplyr) 16 | # 17 | # if (folder=="DEFAULTBULKCSV2017" | folder=="") { 18 | # folder <- getwd() 19 | # } else { 20 | # folder <- paste0(getwd(), "/", folder) 21 | # } 22 | # files = list.files(folder, pattern="*.csv") 23 | # tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows() 24 | # return(tbl) 25 | # if (export!="filenamedefaultbulkcsv2017.csv" & export!="") { 26 | # write_csv(tbl, export) 27 | # } 28 | #} 29 | 30 | 31 | 32 | bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", sheet = 1, col_names = TRUE, col_types = NULL, na = "", 33 | skip = 0){ 34 | require(readxl) 35 | require(dplyr) 36 | require(readr) 37 | 38 | sheet_num_f <- sheet 39 | col_names_f <- col_names 40 | col_types_f <- col_types 41 | na_f <- na 42 | skip_f <- skip 43 | 44 | if (folder=="DEFAULTBULKCSV2017" | folder=="") { 45 | folder <- getwd() 46 | } else { 47 | folder <- paste0(getwd(), "/", folder) 48 | } 49 | 50 | files = list.files(folder, pattern=c("*.xls", "*.xlsx")) 51 | 52 | files <- files[!grepl("~", substr(files, 1,1))] 53 | 54 | 55 | pb <- txtProgressBar(min = 0, max = length(files), style = 3) 56 | 57 | for (i in 1:length(files)) { 58 | file_x <- read_excel(paste0(folder, "/", files[i]), sheet = sheet_f, col_names = col_names_f , col_types = col_types_f, na = na_f, 59 | skip = skip_f) 60 | colnames(file_x) <- ifelse(is.na(colnames(file_x)), "", colnames(file_x)) 61 | if (i ==1) { 62 | all_files <- file_x 63 | } else { 64 | all_files <- rbind(all_files, file_x) 65 | } 66 | setTxtProgressBar(pb, i) 67 | 68 | } 69 | 70 | if (export!="filenamedefaultbulkcsv2017.csv" & export!="") { 71 | write_csv(all_files, export) 72 | } 73 | return(all_files) 74 | 75 | } 76 | 77 | 78 | -------------------------------------------------------------------------------- /R/bulk_csv.R: -------------------------------------------------------------------------------- 1 | #' Creating a single data frame from a folder of similarly structure CSV spreadsheets 2 | #' 3 | #' Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet. 4 | #' @param folder The folder in relation to your working directory where the csv files exist. Default folder is the current working directory. 5 | #' @param export File name to export csv file as, if wanted. 6 | #' @keywords data appending 7 | #' @export 8 | #' @examples 9 | #' bulk_csv(folder="data", export="combined_data.csv") 10 | 11 | # This is the read.csv method using lapply 12 | 13 | #bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", ){ 14 | # require(dplyr) 15 | # 16 | # if (folder=="DEFAULTBULKCSV2017" | folder=="") { 17 | # folder <- getwd() 18 | # } else { 19 | # folder <- paste0(getwd(), "/", folder) 20 | # } 21 | # files = list.files(folder, pattern="*.csv") 22 | # tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows() 23 | # return(tbl) 24 | # if (export!="filenamedefaultbulkcsv2017.csv" & export!="") { 25 | # write_csv(tbl, export) 26 | # } 27 | #} 28 | 29 | # this is the readr version but it loops and has a progress bar 30 | 31 | bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", col_names = TRUE, col_types = NULL, 32 | na = c("", "NA"), quoted_na = TRUE, 33 | comment = "", trim_ws = TRUE, skip = 0, n_max = Inf, 34 | guess_max = min(1000, n_max), progress = interactive()){ 35 | require(readr) 36 | require(dplyr) 37 | 38 | col_names_f <- col_names 39 | col_types_f <- col_types 40 | na_f <- na 41 | quoted_na_f <- quoted_na 42 | comment_f <- comment 43 | trim_ws_f <- trim_ws 44 | skip_f <- skip 45 | n_max_f <- n_max 46 | guess_max_f <- guess_max 47 | progress_f <- progress 48 | 49 | if (folder=="DEFAULTBULKCSV2017" | folder=="") { 50 | folder <- getwd() 51 | } else { 52 | folder <- paste0(getwd(), "/", folder) 53 | } 54 | 55 | files = list.files(folder, pattern="*.csv") 56 | 57 | pb <- txtProgressBar(min = 0, max = length(files), style = 3) 58 | 59 | for (i in 1:length(files)) { 60 | file_x <- read_csv(paste0(folder, "/", files[i]), , col_names = col_names_f, col_types = col_types_f, 61 | na = na_f, quoted_na = quoted_na_f, 62 | comment = comment_f, trim_ws = trim_ws_f, skip = skip_f, n_max = n_max_f, 63 | guess_max = guess_max_f, progress = progress_f) 64 | if (i ==1) { 65 | all_files <- file_x 66 | } else { 67 | all_files <- rbind(all_files, file_x) 68 | } 69 | setTxtProgressBar(pb, i) 70 | 71 | } 72 | 73 | if (export!="filenamedefaultbulkcsv2017.csv" & export!="") { 74 | write_csv(all_files, export) 75 | } 76 | return(all_files) 77 | 78 | } 79 | 80 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | install.packages("muckrock") 2 | library(muckrock) 3 | ?muckrock 4 | ag <- agency_metadata 5 | View(ag) 6 | ju <- jurisdiction_metadata 7 | View(ju) 8 | re <- request_metadata 9 | View(re) 10 | library(muckraker) 11 | ?untangle 12 | library(roxygen2) 13 | roxygenise() 14 | library(muckraker) 15 | ?untangle 16 | roxygenise() 17 | library(muckraker) 18 | ?untangle 19 | roxygenise() 20 | library(muckraker) 21 | ?muckraker 22 | library(muckraker) 23 | library(muckrakr) 24 | library(muckrakr) 25 | library(readr) 26 | ?read_csv 27 | ?list.files 28 | library(roxygen2) 29 | roxygenise() 30 | roxygenise() 31 | library(muckrakr) 32 | library(muckrakr) 33 | roxygenise() 34 | library(muckrakr) 35 | roxygenise() 36 | library(muckrakr) 37 | library(muckrakr) 38 | library(readxl) 39 | ?list.files 40 | ?read_excel 41 | ?write_excel 42 | ?readxl 43 | files = list.files(folder, pattern=c("*.xls", "*.xlsx")) 44 | link="https://twitter.com/memeprovider/status/833888807959289856" 45 | length(link) 46 | link="https://twitter.com/memeprovider/status/833888807959289856" 47 | gsub("https://twitter.com/", "", link) 48 | username <- gsub("https://twitter.com/", "", link) 49 | ?gsub 50 | username <- gsub("https://twitter.com/", "", link) 51 | gsub("/.*","" fixed=T) 52 | gsub("/.*","", fixed=T) 53 | gsub("/.*","",username, fixed=T) 54 | gsub("/*.","",username, fixed=T) 55 | gsub("///*.","",username, fixed=T) 56 | gsub("\\/*.","",username, fixed=T) 57 | gsub("\\/.*","",username, fixed=T) 58 | gsub("status.*","",username, fixed=T) 59 | gsub("status*.","",username, fixed=T) 60 | gsub(".*status","",username, fixed=T) 61 | gsub(".*status","",username, fixed=F) 62 | gsub("/.*","",username, fixed=F) 63 | id_num <- gsub(".*/", "", link) 64 | id_num 65 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 66 | if(!require(devtools)){ 67 | install.packages("devtools") 68 | library(devtools) 69 | } 70 | if(!require(webshot)){ 71 | devtools::install_github("wch/webshot") 72 | library(webshot) 73 | } 74 | if (folder=="DEFAULTTWEETCAP") { 75 | folder=getwd() 76 | } else { 77 | folder <- paste0(getwd(), "/", folder) 78 | } 79 | if (length(link)==1) { 80 | link = link 81 | username <- gsub("https://twitter.com/", "", link) 82 | username <- gsub("/.*","",username, fixed=F) 83 | if (name=="NOTHINGTWEET_CAP") { 84 | pre_name <- username 85 | } else { 86 | pre_name <- filename 87 | } 88 | id_num <- gsub(".*/", "", link) 89 | image_name <- paste0(pre_name, id_num, .png) 90 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 91 | } 92 | } 93 | tweet_cap 94 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 95 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 96 | if(!require(devtools)){ 97 | install.packages("devtools") 98 | library(devtools) 99 | } 100 | if(!require(webshot)){ 101 | devtools::install_github("wch/webshot") 102 | library(webshot) 103 | } 104 | if (folder=="DEFAULTTWEETCAP") { 105 | folder=getwd() 106 | } else { 107 | folder <- paste0(getwd(), "/", folder) 108 | } 109 | if (length(link)==1) { 110 | link = link 111 | username <- gsub("https://twitter.com/", "", link) 112 | username <- gsub("/.*","",username, fixed=F) 113 | if (filename=="NOTHINGTWEET_CAP") { 114 | pre_name <- username 115 | } else { 116 | pre_name <- filename 117 | } 118 | id_num <- gsub(".*/", "", link) 119 | image_name <- paste0(pre_name, id_num, .png) 120 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 121 | } 122 | } 123 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 124 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 125 | if(!require(devtools)){ 126 | install.packages("devtools") 127 | library(devtools) 128 | } 129 | if(!require(webshot)){ 130 | devtools::install_github("wch/webshot") 131 | library(webshot) 132 | } 133 | if (folder=="DEFAULTTWEETCAP") { 134 | folder=getwd() 135 | } else { 136 | folder <- paste0(getwd(), "/", folder) 137 | } 138 | if (length(link)==1) { 139 | link = link 140 | username <- gsub("https://twitter.com/", "", link) 141 | username <- gsub("/.*","",username, fixed=F) 142 | if (filename=="NOTHINGTWEET_CAP") { 143 | pre_name <- username 144 | } else { 145 | pre_name <- filename 146 | } 147 | id_num <- gsub(".*/", "", link) 148 | image_name <- paste0(pre_name, id_num, ".png") 149 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 150 | } 151 | } 152 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 153 | roxygenise() 154 | library(muckrakr) 155 | ?bulk_csv 156 | ?bulk_csv 157 | ?read_excel 158 | ?read_csv 159 | roxygenise() 160 | roxygenise() 161 | library(muckrakr) 162 | knitr::opts_chunk$set(echo = TRUE) 163 | tribble( 164 | ~Link, ~Account, 165 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump", 166 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump", 167 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump", 168 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator", 169 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo" 170 | ) 171 | library(tidyverse) 172 | tribble( 173 | ~Link, ~Account, 174 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump", 175 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump", 176 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump", 177 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator", 178 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo" 179 | ) 180 | tribble( 181 | ~Link, ~Account, 182 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump", 183 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump", 184 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump", 185 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator", 186 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo" 187 | ) 188 | tweets <- tribble( 189 | ~Link, ~Account, 190 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump", 191 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump", 192 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump", 193 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator", 194 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo" 195 | ) 196 | View(tweets) 197 | tweet_cap(link=tweets$Link, folder="images") 198 | tweets$Link 199 | link=tweets$Link 200 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 201 | for (i in length(link)){ 202 | link_i = link[i] 203 | username <- gsub("https://twitter.com/", "", link_i) 204 | username <- gsub("/.*","",username, fixed=F) 205 | if (filename=="NOTHINGTWEET_CAP") { 206 | pre_name <- username 207 | } else { 208 | pre_name <- filename 209 | } 210 | id_num <- gsub(".*/", "", link_i) 211 | image_name <- paste0(pre_name, id_num, ".png") 212 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 213 | setTxtProgressBar(pb, i) 214 | } 215 | filename="NOTHINGTWEET_CAP" 216 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 217 | for (i in length(link)){ 218 | link_i = link[i] 219 | username <- gsub("https://twitter.com/", "", link_i) 220 | username <- gsub("/.*","",username, fixed=F) 221 | if (filename=="NOTHINGTWEET_CAP") { 222 | pre_name <- username 223 | } else { 224 | pre_name <- filename 225 | } 226 | id_num <- gsub(".*/", "", link_i) 227 | image_name <- paste0(pre_name, id_num, ".png") 228 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 229 | setTxtProgressBar(pb, i) 230 | } 231 | folder="DEFAULTTWEETCAP" 232 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 233 | for (i in length(link)){ 234 | link_i = link[i] 235 | username <- gsub("https://twitter.com/", "", link_i) 236 | username <- gsub("/.*","",username, fixed=F) 237 | if (filename=="NOTHINGTWEET_CAP") { 238 | pre_name <- username 239 | } else { 240 | pre_name <- filename 241 | } 242 | id_num <- gsub(".*/", "", link_i) 243 | image_name <- paste0(pre_name, id_num, ".png") 244 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 245 | setTxtProgressBar(pb, i) 246 | } 247 | getwd() 248 | roxygenise() 249 | library(muckrakr) 250 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 251 | tweet_cap(link=tweets$Link, folder="images") 252 | tweet_cap(tweets$Link, folder="images") 253 | tweets$Link 254 | str(tweets$Link) 255 | str("https://twitter.com/memeprovider/status/833888807959289856") 256 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 257 | if(!require(devtools)){ 258 | install.packages("devtools") 259 | library(devtools) 260 | } 261 | if(!require(webshot)){ 262 | devtools::install_github("wch/webshot") 263 | library(webshot) 264 | } 265 | if (folder=="DEFAULTTWEETCAP") { 266 | folder=getwd() 267 | } else { 268 | folder <- paste0(getwd(), "/", folder) 269 | } 270 | if (length(link)==1) { 271 | link = link 272 | username <- gsub("https://twitter.com/", "", link) 273 | username <- gsub("/.*","",username, fixed=F) 274 | if (filename=="NOTHINGTWEET_CAP") { 275 | pre_name <- username 276 | } else { 277 | pre_name <- filename 278 | } 279 | id_num <- gsub(".*/", "", link) 280 | image_name <- paste0(pre_name, id_num, ".png") 281 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 282 | } 283 | if (length(link)>1) { 284 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 285 | for (i in length(link)){ 286 | link_i = link[i] 287 | username <- gsub("https://twitter.com/", "", link_i) 288 | username <- gsub("/.*","",username, fixed=F) 289 | if (filename=="NOTHINGTWEET_CAP") { 290 | pre_name <- username 291 | } else { 292 | pre_name <- filename 293 | } 294 | id_num <- gsub(".*/", "", link_i) 295 | image_name <- paste0(pre_name, id_num, ".png") 296 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 297 | setTxtProgressBar(pb, i) 298 | } 299 | } 300 | } 301 | tweet_cap(tweets$Link) 302 | tweet_cap(tweets$Link, folder="images") 303 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 304 | if(!require(devtools)){ 305 | install.packages("devtools") 306 | library(devtools) 307 | } 308 | if(!require(webshot)){ 309 | devtools::install_github("wch/webshot") 310 | library(webshot) 311 | } 312 | if (folder=="DEFAULTTWEETCAP") { 313 | folder=getwd() 314 | } else { 315 | folder <- paste0(getwd(), "/", folder) 316 | } 317 | if (length(link)==1) { 318 | link = link 319 | username <- gsub("https://twitter.com/", "", link) 320 | username <- gsub("/.*","",username, fixed=F) 321 | if (filename=="NOTHINGTWEET_CAP") { 322 | pre_name <- username 323 | } else { 324 | pre_name <- filename 325 | } 326 | id_num <- gsub(".*/", "", link) 327 | image_name <- paste0(pre_name, id_num, ".png") 328 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 329 | } 330 | if (length(link)>1) { 331 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 332 | for (i in length(link)){ 333 | link_i = link[i] 334 | username <- gsub("https://twitter.com/", "", link_i) 335 | username <- gsub("/.*","",username, fixed=F) 336 | if (filename=="NOTHINGTWEET_CAP") { 337 | pre_name <- username 338 | } else { 339 | pre_name <- filename 340 | } 341 | id_num <- gsub(".*/", "", link_i) 342 | image_name <- paste0(pre_name, id_num, ".png") 343 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 344 | setTxtProgressBar(pb, i) 345 | } 346 | } 347 | } 348 | tweet_cap(tweets$Link, folder="images") 349 | link <- tweets$Link 350 | filename="NOTHINGTWEET_CAP" 351 | folder <- "test" 352 | length(link)>1 353 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 354 | pb 355 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){ 356 | if(!require(devtools)){ 357 | install.packages("devtools") 358 | library(devtools) 359 | } 360 | if(!require(webshot)){ 361 | devtools::install_github("wch/webshot") 362 | library(webshot) 363 | } 364 | if (folder=="DEFAULTTWEETCAP") { 365 | folder=getwd() 366 | } else { 367 | folder <- paste0(getwd(), "/", folder) 368 | } 369 | if (length(link)==1) { 370 | link = link 371 | username <- gsub("https://twitter.com/", "", link) 372 | username <- gsub("/.*","",username, fixed=F) 373 | if (filename=="NOTHINGTWEET_CAP") { 374 | pre_name <- username 375 | } else { 376 | pre_name <- filename 377 | } 378 | id_num <- gsub(".*/", "", link) 379 | image_name <- paste0(pre_name, id_num, ".png") 380 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 381 | } 382 | if (length(link)>1) { 383 | pb <- txtProgressBar(min = 0, max = length(link), style = 3) 384 | for (i in 1:length(link)){ 385 | link_i = link[i] 386 | username <- gsub("https://twitter.com/", "", link_i) 387 | username <- gsub("/.*","",username, fixed=F) 388 | if (filename=="NOTHINGTWEET_CAP") { 389 | pre_name <- username 390 | } else { 391 | pre_name <- filename 392 | } 393 | id_num <- gsub(".*/", "", link_i) 394 | image_name <- paste0(pre_name, id_num, ".png") 395 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container")) 396 | setTxtProgressBar(pb, i) 397 | } 398 | } 399 | } 400 | tweet_cap(tweets$Link, folder="images") 401 | ?tweet_cap 402 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images") 403 | tweet_cap(tweets$Link, folder="images") 404 | roxygenize() 405 | library(muckrakr) 406 | --------------------------------------------------------------------------------