├── .Rproj.user
    ├── 6400D2F9
    │   ├── cpp-definition-cache
    │   ├── sdb
    │   │   └── prop
    │   │   │   ├── 1EBC0BC2
    │   │   │   ├── 3881D3BE
    │   │   │   ├── 5BEA6FD4
    │   │   │   ├── 67865F02
    │   │   │   ├── 741CD66D
    │   │   │   ├── 8DEFE4DD
    │   │   │   ├── AEC4A068
    │   │   │   ├── DB4BBF72
    │   │   │   ├── 1B3FF799
    │   │   │   ├── 60D253CC
    │   │   │   ├── 7D846616
    │   │   │   ├── AB15AAE
    │   │   │   ├── B821A3B8
    │   │   │   ├── FA247785
    │   │   │   └── INDEX
    │   ├── saved_source_markers
    │   ├── pcs
    │   │   ├── source-pane.pper
    │   │   ├── workbench-pane.pper
    │   │   ├── debug-breakpoints.pper
    │   │   ├── files-pane.pper
    │   │   └── windowlayoutstate.pper
    │   ├── session-persistent-state
    │   ├── rmd-outputs
    │   └── persistent-state
    └── shared
    │   └── notebooks
    │       └── paths
├── .Rbuildignore
├── .RData
├── .gitignore
├── .DS_Store
├── docs
    ├── untangle.pdf
    ├── images
    │   ├── csvlist.png
    │   ├── excellist.png
    │   ├── tweet_images.png
    │   └── memeprovider833888807959289856.png
    ├── index.Rmd
    ├── bulk_csv.Rmd
    ├── bulk_excel.Rmd
    ├── tweet_cap.Rmd
    └── untangle.Rmd
├── NAMESPACE
├── R
    ├── setup_folders.R
    ├── dl_file.R
    ├── packagr.R
    ├── untangle.R
    ├── tweet_cap.R
    ├── bulk_excel.R
    └── bulk_csv.R
├── man
    ├── setup_folders.Rd
    ├── dl_file.Rd
    ├── tweet_cap.Rd
    ├── packagr.Rd
    ├── bulk_excel.Rd
    ├── untangle.Rd
    └── bulk_csv.Rd
├── muckrakr.Rproj
├── DESCRIPTION
├── readme.MD
└── .Rhistory


/.Rproj.user/6400D2F9/cpp-definition-cache:
--------------------------------------------------------------------------------
1 | [
2 | ]


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/1EBC0BC2:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/3881D3BE:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/5BEA6FD4:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/67865F02:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/741CD66D:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/8DEFE4DD:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/AEC4A068:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/DB4BBF72:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.RData


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/saved_source_markers:
--------------------------------------------------------------------------------
1 | {"active_set":"","sets":[]}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/source-pane.pper:
--------------------------------------------------------------------------------
1 | {
2 |     "activeTab" : 2
3 | }


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.DS_Store


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/1B3FF799:
--------------------------------------------------------------------------------
1 | {
2 |     "tempName" : "Untitled2"
3 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/60D253CC:
--------------------------------------------------------------------------------
1 | {
2 |     "tempName" : "Untitled1"
3 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/7D846616:
--------------------------------------------------------------------------------
1 | {
2 |     "tempName" : "Untitled1"
3 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/AB15AAE:
--------------------------------------------------------------------------------
1 | {
2 |     "tempName" : "Untitled1"
3 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/B821A3B8:
--------------------------------------------------------------------------------
1 | {
2 |     "tempName" : "Untitled1"
3 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/session-persistent-state:
--------------------------------------------------------------------------------
1 | virtual-session-id="5DBF5373"
2 | 


--------------------------------------------------------------------------------
/docs/untangle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/untangle.pdf


--------------------------------------------------------------------------------
/docs/images/csvlist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/csvlist.png


--------------------------------------------------------------------------------
/docs/images/excellist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/excellist.png


--------------------------------------------------------------------------------
/docs/images/tweet_images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/tweet_images.png


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/FA247785:
--------------------------------------------------------------------------------
1 | {
2 |     "last_setup_crc32" : "DAF4EF54bb338d19",
3 |     "tempName" : "Untitled1"
4 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/workbench-pane.pper:
--------------------------------------------------------------------------------
1 | {
2 |     "TabSet1" : 2,
3 |     "TabSet2" : 0,
4 |     "TabZoom" : {
5 |     }
6 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/debug-breakpoints.pper:
--------------------------------------------------------------------------------
1 | {
2 |     "debugBreakpointsState" : {
3 |         "breakpoints" : [
4 |         ]
5 |     }
6 | }


--------------------------------------------------------------------------------
/docs/images/memeprovider833888807959289856.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/memeprovider833888807959289856.png


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(bulk_csv)
 4 | export(bulk_excel)
 5 | export(dl_file)
 6 | export(packagr)
 7 | export(setup_folders)
 8 | export(tweet_cap)
 9 | export(untangle)
10 | 


--------------------------------------------------------------------------------
/.Rproj.user/shared/notebooks/paths:
--------------------------------------------------------------------------------
1 | /Users/andrewtran/Projects/muckrakr/R/dl_file.R="9577CFAE"
2 | /Users/andrewtran/Projects/muckrakr/R/packagr.R="E8A46326"
3 | /Users/andrewtran/Projects/muckrakr/R/setup_folders.R="F50121FC"
4 | 


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/rmd-outputs:
--------------------------------------------------------------------------------
 1 | ~/Documents/Github/muckrakr/docs/tweet_cap.html
 2 | ~/Documents/Github/muckrakr/tweet_cap.html
 3 | ~/Documents/Github/muckrakr/tweet_cap.html
 4 | ~/Documents/Github/muckrakr/docs/tweet_cap.html
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/files-pane.pper:
--------------------------------------------------------------------------------
 1 | {
 2 |     "path" : "~/Documents/Github/muckrakr/R",
 3 |     "sortOrder" : [
 4 |         {
 5 |             "ascending" : false,
 6 |             "columnIndex" : 2
 7 |         },
 8 |         {
 9 |             "ascending" : false,
10 |             "columnIndex" : 4
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/windowlayoutstate.pper:
--------------------------------------------------------------------------------
 1 | {
 2 |     "left" : {
 3 |         "panelheight" : 813,
 4 |         "splitterpos" : 339,
 5 |         "topwindowstate" : "NORMAL",
 6 |         "windowheight" : 851
 7 |     },
 8 |     "right" : {
 9 |         "panelheight" : 813,
10 |         "splitterpos" : 511,
11 |         "topwindowstate" : "NORMAL",
12 |         "windowheight" : 851
13 |     }
14 | }


--------------------------------------------------------------------------------
/R/setup_folders.R:
--------------------------------------------------------------------------------
 1 | #' Command to set up an optimized folder structure for your project
 2 | #'
 3 | #' Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts`
 4 | #'
 5 | #' @export
 6 | #' @examples
 7 | #' setup_folders()
 8 | 
 9 | 
10 | setup_folders <- function(){
11 | 
12 |   folder_names <- c("raw_data", "output_data", "rmd", "docs", "scripts")
13 | 
14 |   sapply(folder_names, dir.create)
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/setup_folders.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/setup_folders.R
 3 | \name{setup_folders}
 4 | \alias{setup_folders}
 5 | \title{Command to set up an optimized folder structure for your project}
 6 | \usage{
 7 | setup_folders()
 8 | }
 9 | \description{
10 | Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts`
11 | }
12 | \examples{
13 | setup_folders()
14 | }
15 | 


--------------------------------------------------------------------------------
/muckrakr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/docs/index.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "muckrakr"
 3 | output: html_document
 4 | ---
 5 | 
 6 | ----
 7 | 
 8 | A growing collection of R functions and relationship files for data journalists.
 9 | 
10 | Functions so far:
11 | 
12 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)]
13 | 
14 | 
15 | ## Installation
16 | 
17 | ----
18 | 
19 | ```
20 | #install.packages("devtools")
21 | devtools:install_github("andrewbtran/muckrakr")
22 | ```
23 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: muckrakr
 2 | Type: Package
 3 | Title: R tools for journalists
 4 | Version: 0.1.0
 5 | Authors@R: c(
 6 |     person("Andrew", "Tran", email = "abtran@gmail.com", role = "cre"),
 7 |     person("Robert", "Kabacoff", email = "rkabacoff@wesleyan.edu", role = "aut"))
 8 | Description: A collection of tools and relationship files for data journalists.
 9 | URL: http://andrewbtran.github.io/muckrakr/
10 | BugReports: https://github.com/andrewbtran/muckrakr/issues
11 | License: What license is it under?
12 | Encoding: UTF-8
13 | LazyData: true
14 | RoxygenNote: 6.0.1
15 | 


--------------------------------------------------------------------------------
/man/dl_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dl_file.R
 3 | \name{dl_file}
 4 | \alias{dl_file}
 5 | \title{Downloads file from URL if file doesn't exist in set folder}
 6 | \usage{
 7 | dl_file(folder = "data", link = "filenamedefaultbulkcsv2017.csv")
 8 | }
 9 | \arguments{
10 | \item{folder}{The folder in relation to your working directory where the file should exist. Default folder is `data`.}
11 | 
12 | \item{link}{The link to where the file exists}
13 | }
14 | \description{
15 | Downloads file from URL if file doesn't exist in set folder
16 | }
17 | \examples{
18 | dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv")
19 | }
20 | \keyword{and}
21 | \keyword{checking}
22 | \keyword{downloading}
23 | \keyword{file}
24 | 


--------------------------------------------------------------------------------
/man/tweet_cap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tweet_cap.R
 3 | \name{tweet_cap}
 4 | \alias{tweet_cap}
 5 | \title{Generate a screenshot of a tweet}
 6 | \usage{
 7 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP",
 8 |   folder = "DEFAULTTWEETCAP")
 9 | }
10 | \arguments{
11 | \item{link}{the URL of the tweet}
12 | 
13 | \item{filename}{the base name of the image you want to save. Default will be user's twitter handle.}
14 | 
15 | \item{folder}{the folder you want to save the screenshot to}
16 | }
17 | \description{
18 | Quick screenshot of a tweet based on a link
19 | }
20 | \examples{
21 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
22 | }
23 | \keyword{screenshot}
24 | \keyword{twitter}
25 | 


--------------------------------------------------------------------------------
/man/packagr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/packagr.R
 3 | \name{packagr}
 4 | \alias{packagr}
 5 | \title{Downloading packages that are needed but aren't yet on the system}
 6 | \usage{
 7 | packagr(packages = c("tidyverse"), gh_packages = c("abtran/muckrakr"))
 8 | }
 9 | \arguments{
10 | \item{packages}{Array of packages found on CRAN}
11 | 
12 | \item{gh_packages}{Array of packages found on Github}
13 | }
14 | \description{
15 | Downloading packages that are needed but aren't yet on the system
16 | }
17 | \examples{
18 | packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr"))
19 | }
20 | \keyword{Downloading}
21 | \keyword{are}
22 | \keyword{aren't}
23 | \keyword{but}
24 | \keyword{needed}
25 | \keyword{on}
26 | \keyword{packages}
27 | \keyword{system}
28 | \keyword{that}
29 | \keyword{the}
30 | \keyword{yet}
31 | 


--------------------------------------------------------------------------------
/R/dl_file.R:
--------------------------------------------------------------------------------
 1 | #' Downloads file from URL if file doesn't exist in set folder
 2 | #'
 3 | #' @param folder The folder in relation to your working directory where the file should exist. Default folder is `data`.
 4 | #' @param link The link to where the file exists
 5 | #' @keywords file checking and downloading
 6 | #' @export
 7 | #' @examples
 8 | #' dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv")
 9 | 
10 | dl_file <- function(folder="data", link="filenamedefaultbulkcsv2017.csv"){
11 | 
12 |   file_name <- gsub(".*\\/", "", link)
13 |   file_folder <- paste0(folder, "/", file_name)
14 | 
15 |   if (!file.exists(file_folder)) {
16 | 
17 |     dir.create(folder, showWarnings = F)
18 |     download.file(
19 |       link,
20 |       file_folder)
21 |     print("File download complete")
22 |   } else {
23 |     print("File already exists")
24 |   }
25 | 
26 | }
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/INDEX:
--------------------------------------------------------------------------------
 1 | ~%2FDocuments%2FGithub%2Fmuckrakr%2F.gitignore="5BEA6FD4"
 2 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FDESCRIPTION="8DEFE4DD"
 3 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FNAMESPACE="1EBC0BC2"
 4 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_csv.R="7D846616"
 5 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_excel.R="1B3FF799"
 6 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Ftweet_cap.R="B821A3B8"
 7 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Funtangle.R="67865F02"
 8 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_csv.Rmd="60D253CC"
 9 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_excel.Rmd="AB15AAE"
10 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Findex.Rmd="741CD66D"
11 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Ftweet_cap.Rmd="3881D3BE"
12 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Funtangle.Rmd="DB4BBF72"
13 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Freadme.MD="AEC4A068"
14 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Ftweet_cap.Rmd="FA247785"
15 | 


--------------------------------------------------------------------------------
/man/bulk_excel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bulk_excel.R
 3 | \name{bulk_excel}
 4 | \alias{bulk_excel}
 5 | \title{Creating a single data frame from a folder of similarly structured Excel spreadsheets}
 6 | \usage{
 7 | bulk_excel(folder = "DEFAULTBULKCSV2017",
 8 |   export = "filenamedefaultbulkcsv2017.csv", sheet = 1, col_names = TRUE,
 9 |   col_types = NULL, na = "", skip = 0)
10 | }
11 | \arguments{
12 | \item{folder}{The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.}
13 | 
14 | \item{export}{File name to export csv file as, if wanted.}
15 | }
16 | \description{
17 | Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet.
18 | }
19 | \examples{
20 | bulk_excel(folder="data", export="combined_data.xls")
21 | }
22 | \keyword{appending}
23 | \keyword{data}
24 | 


--------------------------------------------------------------------------------
/man/untangle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/untangle.R
 3 | \name{untangle}
 4 | \alias{untangle}
 5 | \title{Disentangling a complex variable}
 6 | \usage{
 7 | untangle(data, x, pattern, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{The name of the dataframe you}
11 | 
12 | \item{x}{Column to untangle}
13 | 
14 | \item{pattern}{Special characters that separate the variables in the column}
15 | 
16 | \item{verbose}{TRUE or FALSE}
17 | }
18 | \description{
19 | We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
20 | The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes
21 | }
22 | \examples{
23 | untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
24 | }
25 | \keyword{data}
26 | \keyword{wrangling}
27 | 


--------------------------------------------------------------------------------
/man/bulk_csv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bulk_csv.R
 3 | \name{bulk_csv}
 4 | \alias{bulk_csv}
 5 | \title{Creating a single data frame from a folder of similarly structure CSV spreadsheets}
 6 | \usage{
 7 | bulk_csv(folder = "DEFAULTBULKCSV2017",
 8 |   export = "filenamedefaultbulkcsv2017.csv", col_names = TRUE,
 9 |   col_types = NULL, na = c("", "NA"), quoted_na = TRUE, comment = "",
10 |   trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max),
11 |   progress = interactive())
12 | }
13 | \arguments{
14 | \item{folder}{The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.}
15 | 
16 | \item{export}{File name to export csv file as, if wanted.}
17 | }
18 | \description{
19 | Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet.
20 | }
21 | \examples{
22 | bulk_csv(folder="data", export="combined_data.csv")
23 | }
24 | \keyword{appending}
25 | \keyword{data}
26 | 


--------------------------------------------------------------------------------
/readme.MD:
--------------------------------------------------------------------------------
 1 | # muckrakr
 2 | 
 3 | ----
 4 | 
 5 | A growing collection of R functions and relationship files for data journalists.
 6 | 
 7 | Functions so far:
 8 | 
 9 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)]
10 | * `bulk_csv` - Creating a large data frame by appending a folder of CSV files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_csv.html)]
11 | * `bulk_excel` - Creating a large data frame by appending a folder of Excel files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_excel.html)]
12 | * `tweet_cap` - Output a screenshot of a tweet (or list of tweets) by URL [[Documentation](http://andrewbtran.github.com/muckrakr/tweet_cap.html)]
13 | * `setup_folders` - Command to set up an optimized folder structure for your project [Documentation to come]
14 | * `dl_file` - Downloads file from URL if file doesn't exist in set folder [Documentation to come]
15 | * `packagr` - Downloading packages that are needed but aren't yet on the system [Documentation to come]
16 | 
17 | ## Installation
18 | 
19 | ----
20 | 
21 | ```
22 | #install.packages("devtools")
23 | devtools::install_github("andrewbtran/muckrakr")
24 | ```
25 | 


--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/persistent-state:
--------------------------------------------------------------------------------
 1 | build-last-errors="[]"
 2 | build-last-errors-base-dir="~/Documents/Github/muckrakr/"
 3 | build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source muckrakr\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/3.3/Resources/library’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘muckrakr’ ...\\n\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** preparing package for lazy loading\\n\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (muckrakr)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
 4 | compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
 5 | console_procs="[]"
 6 | files.monitored-path=""
 7 | find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
 8 | imageDirtyState="0"
 9 | saveActionState="0"
10 | 


--------------------------------------------------------------------------------
/R/packagr.R:
--------------------------------------------------------------------------------
 1 | #' Downloading packages that are needed but aren't yet on the system
 2 | #'
 3 | #' @param packages Array of packages found on CRAN
 4 | #' @param gh_packages Array of packages found on Github
 5 | #' @keywords Downloading packages that are needed but aren't yet on the system
 6 | #' @export
 7 | #' @examples
 8 | #' packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr"))
 9 | 
10 | packagr <- function(packages=c("tidyverse"), gh_packages=c("abtran/muckrakr")){
11 |   require(devtools)
12 | 
13 |   check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
14 |   if(any(!check)){
15 |     pkgs.missing <- packages[!check]
16 |     install.packages(pkgs.missing)
17 |     check <- sapply(pkgs.missing,require,warn.conflicts = TRUE,character.only = TRUE)
18 |   }
19 | 
20 |   if (gh_packages!="abtran/muckrakr") {
21 |    packages <- gsub(".*\\/", "", gh_packages)
22 |    gh_packages_df <- data.frame(gh_packages)
23 |    check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
24 |    if(any(!check)){
25 |     pkgs.missing <- packages[!check]
26 | 
27 |     gh_packages_df <- filter(gh_packages_df, grepl(paste(pkgs.missing, collapse="|"), gh_packages))
28 |     devtools::install_github(gh_packages_df$gh_packages)
29 | 
30 |     check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
31 | 
32 |   }
33 |   }
34 | }
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/R/untangle.R:
--------------------------------------------------------------------------------
 1 | #' Disentangling a complex variable
 2 | #'
 3 | #' We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
 4 | #' The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes
 5 | #' @param data The name of the dataframe you
 6 | #' @param x Column to untangle
 7 | #' @param pattern Special characters that separate the variables in the column
 8 | #' @param verbose TRUE or FALSE
 9 | #' @keywords data wrangling
10 | #' @export
11 | #' @examples
12 | #' untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
13 | 
14 | untangle <- function(data, x, pattern, verbose=FALSE){
15 |   require(stringr)
16 | 
17 |   variable <- str_to_lower(data[[x]])
18 | 
19 |   # obtain list of unique codes
20 |   code_matrix <- str_split(variable, pattern, simplify=TRUE)
21 |   code_vector <- as.character(code_matrix)
22 |   code_vector <- str_trim(code_vector)
23 |   code_vector <- unique(code_vector[code_vector != ""])
24 |   if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n")
25 | 
26 |   # create dummy codes matrix
27 |   nobs <- nrow(data)
28 |   ncodes <- length(code_vector)
29 |   dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes)
30 | 
31 |   # add 1/0 codes
32 |   for(i in 1:ncodes){
33 |     dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0)
34 |   }
35 | 
36 |   # add dummy code names
37 |   dummy_codes <- as.data.frame(dummy_codes)
38 |   codenames <- str_replace_all(code_vector, " ", "_")
39 |   names(dummy_codes) <- codenames
40 | 
41 |   # add to data frame
42 |   newdata <- cbind(data, dummy_codes)
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/docs/bulk_csv.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "bulk_csv()"
 3 | author: "Andrew Ba Tran"
 4 | date: "March 30, 2017"
 5 | output: html_document
 6 | ---
 7 | 
 8 | Creating a single data frame from a folder of similarly structure CSV spreadsheets.
 9 | 
10 | ## Description
11 | 
12 | Like Voltron-ing your data
13 | 
14 | ## Usage
15 | 
16 | bulk_csv(folder = "DEFAULTBULKCSV2017",
17 |   export = "filenamedefaultbulkcsv2017.csv")
18 | 
19 | ## Arguments
20 | 
21 | * `folder` - The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.
22 | * `export` - File name to export csv file as, if wanted.
23 | 
24 | ## Note
25 | 
26 | This package is based on [**readr**](https://blog.rstudio.org/2015/04/09/readr-0-1-0/), so passing on variables from `read_csv` will work in `bulk_csv`. These variables will apply to the import of each CSV sheet.
27 | 
28 | 
29 | ### Documentation
30 | 
31 | In this example, these CSVs reside in a folder called **state_data**.
32 | 
33 | ```{r setup, include=FALSE}
34 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
35 | ```
36 | 
37 | <div style="max-width:400px">
38 | <center>![Folder full of CSVs](images/csvlist.png)</center></div>
39 | 
40 | ----
41 | 
42 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent.
43 | 
44 | Or the bulk join will not work.
45 | 
46 | To create a new dataframe with all the data sets combined, the command would be:
47 | 
48 | `combined_data <- bulk_csv(folder="state_data")`
49 | 
50 | To simply export the data as a new combined CSV file, the command would be:
51 | 
52 | `bulk_csv(folder="state_data", export="combined_data.csv")`
53 | 
54 | This would save the file to the current working directory.
55 | 
56 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like:
57 | 
58 | `bulk_csv(folder="state_data", export="data_export/combined_data.csv")`
59 | 
60 | 


--------------------------------------------------------------------------------
/docs/bulk_excel.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "bulk_excel()"
 3 | author: "Andrew Ba Tran"
 4 | date: "March 30, 2017"
 5 | output: html_document
 6 | ---
 7 | 
 8 | Creating a single data frame from a folder of similarly structure Excel spreadsheets.
 9 | 
10 | ## Description
11 | 
12 | Like Voltron-ing your data
13 | 
14 | ## Usage
15 | 
16 | bulk_excel(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls")
17 | 
18 | ## Arguments
19 | 
20 | * `folder` - The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.
21 | * `export` - File name to export CSV file (Not as an Excel file, sorry) as, if wanted.
22 | 
23 | ## Note
24 | 
25 | This package is based on [**readxl**](https://github.com/tidyverse/readxl), so passing on variables from `read_excel` will work in `bulk_excel`. These variables will apply to the import of each Excel spreadsheet.
26 | 
27 | ### Documentation
28 | 
29 | In this example, these Excel spreadsheets reside in a folder called **state_data**.
30 | 
31 | ```{r setup, include=FALSE}
32 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
33 | ```
34 | 
35 | <div style="max-width:400px">
36 | <center>![Folder full of Excel sheets](images/excellist.png)</center></div>
37 | 
38 | ----
39 | 
40 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent.
41 | 
42 | Or the bulk join will not work.
43 | 
44 | To create a new dataframe with all the data sets combined, the command would be:
45 | 
46 | `combined_data <- bulk_excel(folder="state_data")`
47 | 
48 | To simply export the data as a new combined CSV file, the command would be:
49 | 
50 | `bulk_excel(folder="state_data", export="combined_data.csv")`
51 | 
52 | This would save the file to the current working directory.
53 | 
54 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like:
55 | 
56 | `bulk_excel(folder="state_data", export="data_export/combined_data.csv")`
57 | 
58 | 


--------------------------------------------------------------------------------
/R/tweet_cap.R:
--------------------------------------------------------------------------------
 1 | #' Generate a screenshot of a tweet
 2 | #'
 3 | #' Quick screenshot of a tweet based on a link
 4 | #' @param link the URL of the tweet
 5 | #' @param filename the base name of the image you want to save. Default will be user's twitter handle.
 6 | #' @param folder the folder you want to save the screenshot to
 7 | #' @keywords twitter screenshot
 8 | #' @export
 9 | #' @examples
10 | #' tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
11 | 
12 | 
13 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
14 | 
15 |   if(!require(devtools)){
16 |     install.packages("devtools")
17 |     library(devtools)
18 |   }
19 |   if(!require(webshot)){
20 |     devtools::install_github("wch/webshot")
21 |     library(webshot)
22 |   }
23 | 
24 |   if (folder=="DEFAULTTWEETCAP") {
25 |     folder=getwd()
26 |   } else {
27 |     folder <- paste0(getwd(), "/", folder)
28 |   }
29 | 
30 |   if (length(link)==1) {
31 |     link = link
32 |     username <- gsub("https://twitter.com/", "", link)
33 |     username <- gsub("/.*","",username, fixed=F)
34 |     if (filename=="NOTHINGTWEET_CAP") {
35 |       pre_name <- username
36 |     } else {
37 |       pre_name <- filename
38 |     }
39 |     id_num <- gsub(".*/", "", link)
40 |     image_name <- paste0(pre_name, id_num, ".png")
41 |     webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
42 | 
43 |   }
44 | 
45 |   if (length(link)>1) {
46 |     pb <- txtProgressBar(min = 0, max = length(link), style = 3)
47 | 
48 |     for (i in 1:length(link)){
49 | 
50 |       link_i = link[i]
51 |       username <- gsub("https://twitter.com/", "", link_i)
52 |       username <- gsub("/.*","",username, fixed=F)
53 | 
54 |       if (filename=="NOTHINGTWEET_CAP") {
55 |         pre_name <- username
56 |       } else {
57 |         pre_name <- filename
58 |       }
59 |       id_num <- gsub(".*/", "", link_i)
60 |       image_name <- paste0(pre_name, id_num, ".png")
61 |       webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
62 |       setTxtProgressBar(pb, i)
63 |     }
64 |   }
65 | 
66 | 
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/docs/tweet_cap.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "tweet_cap()"
 3 | author: "Andrew"
 4 | date: "3/31/2017"
 5 | output: html_document
 6 | ---
 7 | 
 8 | Generate a screenshot of a tweet
 9 | 
10 | ## Description
11 | 
12 | Quick screenshot of a tweet based on a link or array of links. The image will represent how the tweet looks on a browser.
13 |   
14 | ## Usage
15 | 
16 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP", folder = "DEFAULTTWEETCAP")
17 | 
18 | ## Arguments
19 | 
20 | 
21 | * `link` - the URL of the tweet
22 | * `filename` -the base name of the image you want to save. Default will be user's twitter handle.
23 | * `folder` - the folder you want to save the screenshot to. If ignored, the folder will be the working directory.
24 | 
25 | ### Documentation
26 | 
27 | #### Single tweet
28 | 
29 | To grat one tweet, just put in the URL address.
30 | 
31 | `tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")`
32 | 
33 | This image will be generated to the **images** folder.
34 | 
35 | <div style="max-width:500px">
36 | <center>![Single image](images/memeprovider833888807959289856.png)</center></div>
37 | 
38 | ----
39 | 
40 | #### Multiple tweets
41 | 
42 | If you had a data frame of links:
43 | 
44 | ```{r tweets_list, warning=F, message=F}
45 | library(tidyverse)
46 | library(knitr)
47 | ```
48 | 
49 | ```{r loading_data}
50 | tweets <- tribble(
51 |                                                             ~Link,          ~Account,
52 |   "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
53 |   "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
54 |   "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
55 |        "https://twitter.com/byrdinator/status/847600835148128258",      "byrdinator",
56 |           "https://twitter.com/Gizmodo/status/847532891127074823",         "Gizmodo"
57 |   )
58 | 
59 | kable(tweets)
60 | ```
61 | 
62 | To generate a folder of images of the list of tweets, use:
63 | 
64 | ```{r, eval=F}
65 | tweet_cap(tweets$Link, folder="images")
66 | ```
67 | 
68 | And you'll get: 
69 | 
70 | <div style="max-width:500px">
71 | <center>![Folder of images](images/tweet_images.png)</center></div>
72 | 


--------------------------------------------------------------------------------
/docs/untangle.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Disentangling a complex variable"
 3 | author: "Rob Kabacoff, Ph.D."
 4 | date: "March 9, 2017"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
10 | ```
11 | 
12 | ## Untangling a complex variable
13 | 
14 | We are occassionally faced with a data set variable that contains multiple pieces of information. Consider the variable **charges** in the following dataset **test_data**.
15 | 
16 | ```{r dataset, warning=F, message=F}
17 | test_data <- data.frame(id=1:4,
18 |                         charges = c("Murder,Kidnapping,Burglary",
19 |                                     "burglary: assault and battery",
20 |                                     "murder",
21 |                                     ""))
22 | knitr::kable(test_data, cpation="test_data")
23 | ```
24 | 
25 | Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
26 | 
27 | The **untangle** function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes:
28 | 
29 | ```{r function, warning=F}
30 | untangle <- function(data, x, pattern, verbose=FALSE){
31 |   require(stringr)
32 |   
33 |   variable <- str_to_lower(data[[x]])
34 |   
35 |   # obtain list of unique codes
36 |   code_matrix <- str_split(variable, pattern, simplify=TRUE)
37 |   code_vector <- as.character(code_matrix)
38 |   code_vector <- str_trim(code_vector)
39 |   code_vector <- unique(code_vector[code_vector != ""])
40 |   if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n")
41 |   
42 |   # create dummy codes matrix
43 |   nobs <- nrow(data)
44 |   ncodes <- length(code_vector)
45 |   dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes)
46 | 
47 |   # add 1/0 codes
48 |   for(i in 1:ncodes){
49 |     dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0)
50 |   }
51 |   
52 |   # add dummy code names
53 |   dummy_codes <- as.data.frame(dummy_codes)
54 |   codenames <- str_replace_all(code_vector, " ", "_")
55 |   names(dummy_codes) <- codenames
56 |   
57 |   # add to data frame
58 |   newdata <- cbind(data, dummy_codes)
59 |   
60 | }
61 | 
62 | ```
63 | 
64 | ```{r, echo=TRUE, warning=F}
65 | test_data <- untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
66 | 
67 | ```
68 | 
69 | 
70 | 
71 | ```{r results}
72 | knitr::kable(test_data, caption="New test_data")
73 | 
74 | ```
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/R/bulk_excel.R:
--------------------------------------------------------------------------------
 1 | #' Creating a single data frame from a folder of similarly structured Excel spreadsheets
 2 | #'
 3 | #' Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet.
 4 | 
 5 | #' @param folder The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.
 6 | #' @param export File name to export csv file as, if wanted.
 7 | #' @keywords data appending
 8 | #' @export
 9 | #' @examples
10 | #' bulk_excel(folder="data", export="combined_data.xls")
11 | 
12 | # This is the read.csv method using lapply
13 | 
14 | #bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls"){
15 | #  require(dplyr)
16 | #
17 | #  if (folder=="DEFAULTBULKCSV2017" | folder=="") {
18 | #    folder <- getwd()
19 | #  } else {
20 | #    folder <- paste0(getwd(), "/", folder)
21 | #  }
22 | #  files = list.files(folder, pattern="*.csv")
23 | #  tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows()
24 | #  return(tbl)
25 | #  if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
26 | #    write_csv(tbl, export)
27 | #  }
28 | #}
29 | 
30 | 
31 | 
32 | bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv",  sheet = 1, col_names = TRUE, col_types = NULL, na = "",
33 |                        skip = 0){
34 |   require(readxl)
35 |   require(dplyr)
36 |   require(readr)
37 | 
38 |   sheet_num_f <- sheet
39 |   col_names_f <- col_names
40 |   col_types_f <- col_types
41 |   na_f <- na
42 |   skip_f <- skip
43 | 
44 |   if (folder=="DEFAULTBULKCSV2017" | folder=="") {
45 |     folder <- getwd()
46 |   } else {
47 |     folder <- paste0(getwd(), "/", folder)
48 |   }
49 | 
50 |   files = list.files(folder, pattern=c("*.xls", "*.xlsx"))
51 | 
52 |   files <- files[!grepl("~", substr(files, 1,1))]
53 | 
54 | 
55 |   pb <- txtProgressBar(min = 0, max = length(files), style = 3)
56 | 
57 |   for (i in 1:length(files)) {
58 |     file_x <- read_excel(paste0(folder, "/", files[i]),  sheet = sheet_f, col_names = col_names_f , col_types = col_types_f, na = na_f,
59 |                          skip = skip_f)
60 |     colnames(file_x) <- ifelse(is.na(colnames(file_x)), "", colnames(file_x))
61 |     if (i ==1) {
62 |       all_files <- file_x
63 |     } else {
64 |       all_files <- rbind(all_files, file_x)
65 |     }
66 |     setTxtProgressBar(pb, i)
67 | 
68 |   }
69 | 
70 |   if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
71 |     write_csv(all_files, export)
72 |   }
73 |   return(all_files)
74 | 
75 | }
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/R/bulk_csv.R:
--------------------------------------------------------------------------------
 1 | #' Creating a single data frame from a folder of similarly structure CSV spreadsheets
 2 | #'
 3 | #' Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet.
 4 | #' @param folder The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.
 5 | #' @param export File name to export csv file as, if wanted.
 6 | #' @keywords data appending
 7 | #' @export
 8 | #' @examples
 9 | #' bulk_csv(folder="data", export="combined_data.csv")
10 | 
11 | # This is the read.csv method using lapply
12 | 
13 | #bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", ){
14 | #  require(dplyr)
15 | #
16 | #  if (folder=="DEFAULTBULKCSV2017" | folder=="") {
17 | #    folder <- getwd()
18 | #  } else {
19 | #    folder <- paste0(getwd(), "/", folder)
20 | #  }
21 | #  files = list.files(folder, pattern="*.csv")
22 | #  tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows()
23 | #  return(tbl)
24 | #  if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
25 | #    write_csv(tbl, export)
26 | #  }
27 | #}
28 | 
29 | # this is the readr version but it loops and has a progress bar
30 | 
31 | bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", col_names = TRUE, col_types = NULL,
32 |                      na = c("", "NA"), quoted_na = TRUE,
33 |                      comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
34 |                      guess_max = min(1000, n_max), progress = interactive()){
35 |   require(readr)
36 |   require(dplyr)
37 | 
38 |   col_names_f <- col_names
39 |   col_types_f <- col_types
40 |   na_f <- na
41 |   quoted_na_f <- quoted_na
42 |   comment_f <- comment
43 |   trim_ws_f <- trim_ws
44 |   skip_f <- skip
45 |   n_max_f <- n_max
46 |   guess_max_f <- guess_max
47 |   progress_f <- progress
48 | 
49 |   if (folder=="DEFAULTBULKCSV2017" | folder=="") {
50 |     folder <- getwd()
51 |   } else {
52 |     folder <- paste0(getwd(), "/", folder)
53 |   }
54 | 
55 |   files = list.files(folder, pattern="*.csv")
56 | 
57 |   pb <- txtProgressBar(min = 0, max = length(files), style = 3)
58 | 
59 |   for (i in 1:length(files)) {
60 |     file_x <- read_csv(paste0(folder, "/", files[i]), , col_names = col_names_f, col_types = col_types_f,
61 |                        na = na_f, quoted_na = quoted_na_f,
62 |                        comment = comment_f, trim_ws = trim_ws_f, skip = skip_f, n_max = n_max_f,
63 |                        guess_max = guess_max_f, progress = progress_f)
64 |     if (i ==1) {
65 |       all_files <- file_x
66 |     } else {
67 |       all_files <- rbind(all_files, file_x)
68 |     }
69 |     setTxtProgressBar(pb, i)
70 | 
71 |   }
72 | 
73 |   if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
74 |     write_csv(all_files, export)
75 |   }
76 |   return(all_files)
77 | 
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
  1 | install.packages("muckrock")
  2 | library(muckrock)
  3 | ?muckrock
  4 | ag <- agency_metadata
  5 | View(ag)
  6 | ju <- jurisdiction_metadata
  7 | View(ju)
  8 | re <- request_metadata
  9 | View(re)
 10 | library(muckraker)
 11 | ?untangle
 12 | library(roxygen2)
 13 | roxygenise()
 14 | library(muckraker)
 15 | ?untangle
 16 | roxygenise()
 17 | library(muckraker)
 18 | ?untangle
 19 | roxygenise()
 20 | library(muckraker)
 21 | ?muckraker
 22 | library(muckraker)
 23 | library(muckrakr)
 24 | library(muckrakr)
 25 | library(readr)
 26 | ?read_csv
 27 | ?list.files
 28 | library(roxygen2)
 29 | roxygenise()
 30 | roxygenise()
 31 | library(muckrakr)
 32 | library(muckrakr)
 33 | roxygenise()
 34 | library(muckrakr)
 35 | roxygenise()
 36 | library(muckrakr)
 37 | library(muckrakr)
 38 | library(readxl)
 39 | ?list.files
 40 | ?read_excel
 41 | ?write_excel
 42 | ?readxl
 43 | files = list.files(folder, pattern=c("*.xls", "*.xlsx"))
 44 | link="https://twitter.com/memeprovider/status/833888807959289856"
 45 | length(link)
 46 | link="https://twitter.com/memeprovider/status/833888807959289856"
 47 | gsub("https://twitter.com/", "", link)
 48 | username <- gsub("https://twitter.com/", "", link)
 49 | ?gsub
 50 | username <- gsub("https://twitter.com/", "", link)
 51 | gsub("/.*","" fixed=T)
 52 | gsub("/.*","", fixed=T)
 53 | gsub("/.*","",username, fixed=T)
 54 | gsub("/*.","",username, fixed=T)
 55 | gsub("///*.","",username, fixed=T)
 56 | gsub("\\/*.","",username, fixed=T)
 57 | gsub("\\/.*","",username, fixed=T)
 58 | gsub("status.*","",username, fixed=T)
 59 | gsub("status*.","",username, fixed=T)
 60 | gsub(".*status","",username, fixed=T)
 61 | gsub(".*status","",username, fixed=F)
 62 | gsub("/.*","",username, fixed=F)
 63 | id_num <- gsub(".*/", "", link)
 64 | id_num
 65 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
 66 | if(!require(devtools)){
 67 | install.packages("devtools")
 68 | library(devtools)
 69 | }
 70 | if(!require(webshot)){
 71 | devtools::install_github("wch/webshot")
 72 | library(webshot)
 73 | }
 74 | if (folder=="DEFAULTTWEETCAP") {
 75 | folder=getwd()
 76 | } else {
 77 | folder <- paste0(getwd(), "/", folder)
 78 | }
 79 | if (length(link)==1) {
 80 | link = link
 81 | username <- gsub("https://twitter.com/", "", link)
 82 | username <- gsub("/.*","",username, fixed=F)
 83 | if (name=="NOTHINGTWEET_CAP") {
 84 | pre_name <- username
 85 | } else {
 86 | pre_name <- filename
 87 | }
 88 | id_num <- gsub(".*/", "", link)
 89 | image_name <- paste0(pre_name, id_num, .png)
 90 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
 91 | }
 92 | }
 93 | tweet_cap
 94 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
 95 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
 96 | if(!require(devtools)){
 97 | install.packages("devtools")
 98 | library(devtools)
 99 | }
100 | if(!require(webshot)){
101 | devtools::install_github("wch/webshot")
102 | library(webshot)
103 | }
104 | if (folder=="DEFAULTTWEETCAP") {
105 | folder=getwd()
106 | } else {
107 | folder <- paste0(getwd(), "/", folder)
108 | }
109 | if (length(link)==1) {
110 | link = link
111 | username <- gsub("https://twitter.com/", "", link)
112 | username <- gsub("/.*","",username, fixed=F)
113 | if (filename=="NOTHINGTWEET_CAP") {
114 | pre_name <- username
115 | } else {
116 | pre_name <- filename
117 | }
118 | id_num <- gsub(".*/", "", link)
119 | image_name <- paste0(pre_name, id_num, .png)
120 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
121 | }
122 | }
123 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
124 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
125 | if(!require(devtools)){
126 | install.packages("devtools")
127 | library(devtools)
128 | }
129 | if(!require(webshot)){
130 | devtools::install_github("wch/webshot")
131 | library(webshot)
132 | }
133 | if (folder=="DEFAULTTWEETCAP") {
134 | folder=getwd()
135 | } else {
136 | folder <- paste0(getwd(), "/", folder)
137 | }
138 | if (length(link)==1) {
139 | link = link
140 | username <- gsub("https://twitter.com/", "", link)
141 | username <- gsub("/.*","",username, fixed=F)
142 | if (filename=="NOTHINGTWEET_CAP") {
143 | pre_name <- username
144 | } else {
145 | pre_name <- filename
146 | }
147 | id_num <- gsub(".*/", "", link)
148 | image_name <- paste0(pre_name, id_num, ".png")
149 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
150 | }
151 | }
152 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
153 | roxygenise()
154 | library(muckrakr)
155 | ?bulk_csv
156 | ?bulk_csv
157 | ?read_excel
158 | ?read_csv
159 | roxygenise()
160 | roxygenise()
161 | library(muckrakr)
162 | knitr::opts_chunk$set(echo = TRUE)
163 | tribble(
164 | ~Link,          ~Account,
165 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
166 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
167 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
168 | "https://twitter.com/byrdinator/status/847600835148128258",      "byrdinator",
169 | "https://twitter.com/Gizmodo/status/847532891127074823",         "Gizmodo"
170 | )
171 | library(tidyverse)
172 | tribble(
173 | ~Link,          ~Account,
174 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
175 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
176 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
177 | "https://twitter.com/byrdinator/status/847600835148128258",      "byrdinator",
178 | "https://twitter.com/Gizmodo/status/847532891127074823",         "Gizmodo"
179 | )
180 | tribble(
181 | ~Link,          ~Account,
182 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
183 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
184 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
185 | "https://twitter.com/byrdinator/status/847600835148128258",      "byrdinator",
186 | "https://twitter.com/Gizmodo/status/847532891127074823",         "Gizmodo"
187 | )
188 | tweets <- tribble(
189 | ~Link,          ~Account,
190 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
191 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
192 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
193 | "https://twitter.com/byrdinator/status/847600835148128258",      "byrdinator",
194 | "https://twitter.com/Gizmodo/status/847532891127074823",         "Gizmodo"
195 | )
196 | View(tweets)
197 | tweet_cap(link=tweets$Link, folder="images")
198 | tweets$Link
199 | link=tweets$Link
200 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
201 | for (i in length(link)){
202 | link_i = link[i]
203 | username <- gsub("https://twitter.com/", "", link_i)
204 | username <- gsub("/.*","",username, fixed=F)
205 | if (filename=="NOTHINGTWEET_CAP") {
206 | pre_name <- username
207 | } else {
208 | pre_name <- filename
209 | }
210 | id_num <- gsub(".*/", "", link_i)
211 | image_name <- paste0(pre_name, id_num, ".png")
212 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
213 | setTxtProgressBar(pb, i)
214 | }
215 | filename="NOTHINGTWEET_CAP"
216 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
217 | for (i in length(link)){
218 | link_i = link[i]
219 | username <- gsub("https://twitter.com/", "", link_i)
220 | username <- gsub("/.*","",username, fixed=F)
221 | if (filename=="NOTHINGTWEET_CAP") {
222 | pre_name <- username
223 | } else {
224 | pre_name <- filename
225 | }
226 | id_num <- gsub(".*/", "", link_i)
227 | image_name <- paste0(pre_name, id_num, ".png")
228 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
229 | setTxtProgressBar(pb, i)
230 | }
231 | folder="DEFAULTTWEETCAP"
232 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
233 | for (i in length(link)){
234 | link_i = link[i]
235 | username <- gsub("https://twitter.com/", "", link_i)
236 | username <- gsub("/.*","",username, fixed=F)
237 | if (filename=="NOTHINGTWEET_CAP") {
238 | pre_name <- username
239 | } else {
240 | pre_name <- filename
241 | }
242 | id_num <- gsub(".*/", "", link_i)
243 | image_name <- paste0(pre_name, id_num, ".png")
244 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
245 | setTxtProgressBar(pb, i)
246 | }
247 | getwd()
248 | roxygenise()
249 | library(muckrakr)
250 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
251 | tweet_cap(link=tweets$Link, folder="images")
252 | tweet_cap(tweets$Link, folder="images")
253 | tweets$Link
254 | str(tweets$Link)
255 | str("https://twitter.com/memeprovider/status/833888807959289856")
256 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
257 | if(!require(devtools)){
258 | install.packages("devtools")
259 | library(devtools)
260 | }
261 | if(!require(webshot)){
262 | devtools::install_github("wch/webshot")
263 | library(webshot)
264 | }
265 | if (folder=="DEFAULTTWEETCAP") {
266 | folder=getwd()
267 | } else {
268 | folder <- paste0(getwd(), "/", folder)
269 | }
270 | if (length(link)==1) {
271 | link = link
272 | username <- gsub("https://twitter.com/", "", link)
273 | username <- gsub("/.*","",username, fixed=F)
274 | if (filename=="NOTHINGTWEET_CAP") {
275 | pre_name <- username
276 | } else {
277 | pre_name <- filename
278 | }
279 | id_num <- gsub(".*/", "", link)
280 | image_name <- paste0(pre_name, id_num, ".png")
281 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
282 | }
283 | if (length(link)>1) {
284 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
285 | for (i in length(link)){
286 | link_i = link[i]
287 | username <- gsub("https://twitter.com/", "", link_i)
288 | username <- gsub("/.*","",username, fixed=F)
289 | if (filename=="NOTHINGTWEET_CAP") {
290 | pre_name <- username
291 | } else {
292 | pre_name <- filename
293 | }
294 | id_num <- gsub(".*/", "", link_i)
295 | image_name <- paste0(pre_name, id_num, ".png")
296 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
297 | setTxtProgressBar(pb, i)
298 | }
299 | }
300 | }
301 | tweet_cap(tweets$Link)
302 | tweet_cap(tweets$Link, folder="images")
303 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
304 | if(!require(devtools)){
305 | install.packages("devtools")
306 | library(devtools)
307 | }
308 | if(!require(webshot)){
309 | devtools::install_github("wch/webshot")
310 | library(webshot)
311 | }
312 | if (folder=="DEFAULTTWEETCAP") {
313 | folder=getwd()
314 | } else {
315 | folder <- paste0(getwd(), "/", folder)
316 | }
317 | if (length(link)==1) {
318 | link = link
319 | username <- gsub("https://twitter.com/", "", link)
320 | username <- gsub("/.*","",username, fixed=F)
321 | if (filename=="NOTHINGTWEET_CAP") {
322 | pre_name <- username
323 | } else {
324 | pre_name <- filename
325 | }
326 | id_num <- gsub(".*/", "", link)
327 | image_name <- paste0(pre_name, id_num, ".png")
328 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
329 | }
330 | if (length(link)>1) {
331 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
332 | for (i in length(link)){
333 | link_i = link[i]
334 | username <- gsub("https://twitter.com/", "", link_i)
335 | username <- gsub("/.*","",username, fixed=F)
336 | if (filename=="NOTHINGTWEET_CAP") {
337 | pre_name <- username
338 | } else {
339 | pre_name <- filename
340 | }
341 | id_num <- gsub(".*/", "", link_i)
342 | image_name <- paste0(pre_name, id_num, ".png")
343 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
344 | setTxtProgressBar(pb, i)
345 | }
346 | }
347 | }
348 | tweet_cap(tweets$Link, folder="images")
349 | link <- tweets$Link
350 | filename="NOTHINGTWEET_CAP"
351 | folder <- "test"
352 | length(link)>1
353 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
354 | pb
355 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
356 | if(!require(devtools)){
357 | install.packages("devtools")
358 | library(devtools)
359 | }
360 | if(!require(webshot)){
361 | devtools::install_github("wch/webshot")
362 | library(webshot)
363 | }
364 | if (folder=="DEFAULTTWEETCAP") {
365 | folder=getwd()
366 | } else {
367 | folder <- paste0(getwd(), "/", folder)
368 | }
369 | if (length(link)==1) {
370 | link = link
371 | username <- gsub("https://twitter.com/", "", link)
372 | username <- gsub("/.*","",username, fixed=F)
373 | if (filename=="NOTHINGTWEET_CAP") {
374 | pre_name <- username
375 | } else {
376 | pre_name <- filename
377 | }
378 | id_num <- gsub(".*/", "", link)
379 | image_name <- paste0(pre_name, id_num, ".png")
380 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
381 | }
382 | if (length(link)>1) {
383 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
384 | for (i in 1:length(link)){
385 | link_i = link[i]
386 | username <- gsub("https://twitter.com/", "", link_i)
387 | username <- gsub("/.*","",username, fixed=F)
388 | if (filename=="NOTHINGTWEET_CAP") {
389 | pre_name <- username
390 | } else {
391 | pre_name <- filename
392 | }
393 | id_num <- gsub(".*/", "", link_i)
394 | image_name <- paste0(pre_name, id_num, ".png")
395 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
396 | setTxtProgressBar(pb, i)
397 | }
398 | }
399 | }
400 | tweet_cap(tweets$Link, folder="images")
401 | ?tweet_cap
402 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
403 | tweet_cap(tweets$Link, folder="images")
404 | roxygenize()
405 | library(muckrakr)
406 | 


--------------------------------------------------------------------------------