├── .Rproj.user
├── 6400D2F9
│ ├── cpp-definition-cache
│ ├── sdb
│ │ └── prop
│ │ │ ├── 1EBC0BC2
│ │ │ ├── 3881D3BE
│ │ │ ├── 5BEA6FD4
│ │ │ ├── 67865F02
│ │ │ ├── 741CD66D
│ │ │ ├── 8DEFE4DD
│ │ │ ├── AEC4A068
│ │ │ ├── DB4BBF72
│ │ │ ├── 1B3FF799
│ │ │ ├── 60D253CC
│ │ │ ├── 7D846616
│ │ │ ├── AB15AAE
│ │ │ ├── B821A3B8
│ │ │ ├── FA247785
│ │ │ └── INDEX
│ ├── saved_source_markers
│ ├── pcs
│ │ ├── source-pane.pper
│ │ ├── workbench-pane.pper
│ │ ├── debug-breakpoints.pper
│ │ ├── files-pane.pper
│ │ └── windowlayoutstate.pper
│ ├── session-persistent-state
│ ├── rmd-outputs
│ └── persistent-state
└── shared
│ └── notebooks
│ └── paths
├── .Rbuildignore
├── .RData
├── .gitignore
├── .DS_Store
├── docs
├── untangle.pdf
├── images
│ ├── csvlist.png
│ ├── excellist.png
│ ├── tweet_images.png
│ └── memeprovider833888807959289856.png
├── index.Rmd
├── bulk_csv.Rmd
├── bulk_excel.Rmd
├── tweet_cap.Rmd
└── untangle.Rmd
├── NAMESPACE
├── R
├── setup_folders.R
├── dl_file.R
├── packagr.R
├── untangle.R
├── tweet_cap.R
├── bulk_excel.R
└── bulk_csv.R
├── man
├── setup_folders.Rd
├── dl_file.Rd
├── tweet_cap.Rd
├── packagr.Rd
├── bulk_excel.Rd
├── untangle.Rd
└── bulk_csv.Rd
├── muckrakr.Rproj
├── DESCRIPTION
├── readme.MD
└── .Rhistory
/.Rproj.user/6400D2F9/cpp-definition-cache:
--------------------------------------------------------------------------------
1 | [
2 | ]
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/1EBC0BC2:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/3881D3BE:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/5BEA6FD4:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/67865F02:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/741CD66D:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/8DEFE4DD:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/AEC4A068:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/DB4BBF72:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 |
--------------------------------------------------------------------------------
/.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.RData
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/saved_source_markers:
--------------------------------------------------------------------------------
1 | {"active_set":"","sets":[]}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 |
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/source-pane.pper:
--------------------------------------------------------------------------------
1 | {
2 | "activeTab" : 2
3 | }
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/.DS_Store
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/1B3FF799:
--------------------------------------------------------------------------------
1 | {
2 | "tempName" : "Untitled2"
3 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/60D253CC:
--------------------------------------------------------------------------------
1 | {
2 | "tempName" : "Untitled1"
3 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/7D846616:
--------------------------------------------------------------------------------
1 | {
2 | "tempName" : "Untitled1"
3 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/AB15AAE:
--------------------------------------------------------------------------------
1 | {
2 | "tempName" : "Untitled1"
3 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/B821A3B8:
--------------------------------------------------------------------------------
1 | {
2 | "tempName" : "Untitled1"
3 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/session-persistent-state:
--------------------------------------------------------------------------------
1 | virtual-session-id="5DBF5373"
2 |
--------------------------------------------------------------------------------
/docs/untangle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/untangle.pdf
--------------------------------------------------------------------------------
/docs/images/csvlist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/csvlist.png
--------------------------------------------------------------------------------
/docs/images/excellist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/excellist.png
--------------------------------------------------------------------------------
/docs/images/tweet_images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/tweet_images.png
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/FA247785:
--------------------------------------------------------------------------------
1 | {
2 | "last_setup_crc32" : "DAF4EF54bb338d19",
3 | "tempName" : "Untitled1"
4 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/workbench-pane.pper:
--------------------------------------------------------------------------------
1 | {
2 | "TabSet1" : 2,
3 | "TabSet2" : 0,
4 | "TabZoom" : {
5 | }
6 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/debug-breakpoints.pper:
--------------------------------------------------------------------------------
1 | {
2 | "debugBreakpointsState" : {
3 | "breakpoints" : [
4 | ]
5 | }
6 | }
--------------------------------------------------------------------------------
/docs/images/memeprovider833888807959289856.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andrewbtran/muckrakr/HEAD/docs/images/memeprovider833888807959289856.png
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(bulk_csv)
4 | export(bulk_excel)
5 | export(dl_file)
6 | export(packagr)
7 | export(setup_folders)
8 | export(tweet_cap)
9 | export(untangle)
10 |
--------------------------------------------------------------------------------
/.Rproj.user/shared/notebooks/paths:
--------------------------------------------------------------------------------
1 | /Users/andrewtran/Projects/muckrakr/R/dl_file.R="9577CFAE"
2 | /Users/andrewtran/Projects/muckrakr/R/packagr.R="E8A46326"
3 | /Users/andrewtran/Projects/muckrakr/R/setup_folders.R="F50121FC"
4 |
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/rmd-outputs:
--------------------------------------------------------------------------------
1 | ~/Documents/Github/muckrakr/docs/tweet_cap.html
2 | ~/Documents/Github/muckrakr/tweet_cap.html
3 | ~/Documents/Github/muckrakr/tweet_cap.html
4 | ~/Documents/Github/muckrakr/docs/tweet_cap.html
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/files-pane.pper:
--------------------------------------------------------------------------------
1 | {
2 | "path" : "~/Documents/Github/muckrakr/R",
3 | "sortOrder" : [
4 | {
5 | "ascending" : false,
6 | "columnIndex" : 2
7 | },
8 | {
9 | "ascending" : false,
10 | "columnIndex" : 4
11 | }
12 | ]
13 | }
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/pcs/windowlayoutstate.pper:
--------------------------------------------------------------------------------
1 | {
2 | "left" : {
3 | "panelheight" : 813,
4 | "splitterpos" : 339,
5 | "topwindowstate" : "NORMAL",
6 | "windowheight" : 851
7 | },
8 | "right" : {
9 | "panelheight" : 813,
10 | "splitterpos" : 511,
11 | "topwindowstate" : "NORMAL",
12 | "windowheight" : 851
13 | }
14 | }
--------------------------------------------------------------------------------
/R/setup_folders.R:
--------------------------------------------------------------------------------
1 | #' Command to set up an optimized folder structure for your project
2 | #'
3 | #' Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts`
4 | #'
5 | #' @export
6 | #' @examples
7 | #' setup_folders()
8 |
9 |
10 | setup_folders <- function(){
11 |
12 | folder_names <- c("raw_data", "output_data", "rmd", "docs", "scripts")
13 |
14 | sapply(folder_names, dir.create)
15 |
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/man/setup_folders.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/setup_folders.R
3 | \name{setup_folders}
4 | \alias{setup_folders}
5 | \title{Command to set up an optimized folder structure for your project}
6 | \usage{
7 | setup_folders()
8 | }
9 | \description{
10 | Folders that will be generated: `raw_output`, `output_data`, `rmd`, `docs`, `scripts`
11 | }
12 | \examples{
13 | setup_folders()
14 | }
15 |
--------------------------------------------------------------------------------
/muckrakr.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/docs/index.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "muckrakr"
3 | output: html_document
4 | ---
5 |
6 | ----
7 |
8 | A growing collection of R functions and relationship files for data journalists.
9 |
10 | Functions so far:
11 |
12 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)]
13 |
14 |
15 | ## Installation
16 |
17 | ----
18 |
19 | ```
20 | #install.packages("devtools")
21 | devtools:install_github("andrewbtran/muckrakr")
22 | ```
23 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: muckrakr
2 | Type: Package
3 | Title: R tools for journalists
4 | Version: 0.1.0
5 | Authors@R: c(
6 | person("Andrew", "Tran", email = "abtran@gmail.com", role = "cre"),
7 | person("Robert", "Kabacoff", email = "rkabacoff@wesleyan.edu", role = "aut"))
8 | Description: A collection of tools and relationship files for data journalists.
9 | URL: http://andrewbtran.github.io/muckrakr/
10 | BugReports: https://github.com/andrewbtran/muckrakr/issues
11 | License: What license is it under?
12 | Encoding: UTF-8
13 | LazyData: true
14 | RoxygenNote: 6.0.1
15 |
--------------------------------------------------------------------------------
/man/dl_file.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dl_file.R
3 | \name{dl_file}
4 | \alias{dl_file}
5 | \title{Downloads file from URL if file doesn't exist in set folder}
6 | \usage{
7 | dl_file(folder = "data", link = "filenamedefaultbulkcsv2017.csv")
8 | }
9 | \arguments{
10 | \item{folder}{The folder in relation to your working directory where the file should exist. Default folder is `data`.}
11 |
12 | \item{link}{The link to where the file exists}
13 | }
14 | \description{
15 | Downloads file from URL if file doesn't exist in set folder
16 | }
17 | \examples{
18 | dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv")
19 | }
20 | \keyword{and}
21 | \keyword{checking}
22 | \keyword{downloading}
23 | \keyword{file}
24 |
--------------------------------------------------------------------------------
/man/tweet_cap.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tweet_cap.R
3 | \name{tweet_cap}
4 | \alias{tweet_cap}
5 | \title{Generate a screenshot of a tweet}
6 | \usage{
7 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP",
8 | folder = "DEFAULTTWEETCAP")
9 | }
10 | \arguments{
11 | \item{link}{the URL of the tweet}
12 |
13 | \item{filename}{the base name of the image you want to save. Default will be user's twitter handle.}
14 |
15 | \item{folder}{the folder you want to save the screenshot to}
16 | }
17 | \description{
18 | Quick screenshot of a tweet based on a link
19 | }
20 | \examples{
21 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
22 | }
23 | \keyword{screenshot}
24 | \keyword{twitter}
25 |
--------------------------------------------------------------------------------
/man/packagr.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/packagr.R
3 | \name{packagr}
4 | \alias{packagr}
5 | \title{Downloading packages that are needed but aren't yet on the system}
6 | \usage{
7 | packagr(packages = c("tidyverse"), gh_packages = c("abtran/muckrakr"))
8 | }
9 | \arguments{
10 | \item{packages}{Array of packages found on CRAN}
11 |
12 | \item{gh_packages}{Array of packages found on Github}
13 | }
14 | \description{
15 | Downloading packages that are needed but aren't yet on the system
16 | }
17 | \examples{
18 | packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr"))
19 | }
20 | \keyword{Downloading}
21 | \keyword{are}
22 | \keyword{aren't}
23 | \keyword{but}
24 | \keyword{needed}
25 | \keyword{on}
26 | \keyword{packages}
27 | \keyword{system}
28 | \keyword{that}
29 | \keyword{the}
30 | \keyword{yet}
31 |
--------------------------------------------------------------------------------
/R/dl_file.R:
--------------------------------------------------------------------------------
1 | #' Downloads file from URL if file doesn't exist in set folder
2 | #'
3 | #' @param folder The folder in relation to your working directory where the file should exist. Default folder is `data`.
4 | #' @param link The link to where the file exists
5 | #' @keywords file checking and downloading
6 | #' @export
7 | #' @examples
8 | #' dl_file(folder="data", link="https://website.com/data/bostonpayroll2013.csv")
9 |
10 | dl_file <- function(folder="data", link="filenamedefaultbulkcsv2017.csv"){
11 |
12 | file_name <- gsub(".*\\/", "", link)
13 | file_folder <- paste0(folder, "/", file_name)
14 |
15 | if (!file.exists(file_folder)) {
16 |
17 | dir.create(folder, showWarnings = F)
18 | download.file(
19 | link,
20 | file_folder)
21 | print("File download complete")
22 | } else {
23 | print("File already exists")
24 | }
25 |
26 | }
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/sdb/prop/INDEX:
--------------------------------------------------------------------------------
1 | ~%2FDocuments%2FGithub%2Fmuckrakr%2F.gitignore="5BEA6FD4"
2 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FDESCRIPTION="8DEFE4DD"
3 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FNAMESPACE="1EBC0BC2"
4 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_csv.R="7D846616"
5 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Fbulk_excel.R="1B3FF799"
6 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Ftweet_cap.R="B821A3B8"
7 | ~%2FDocuments%2FGithub%2Fmuckrakr%2FR%2Funtangle.R="67865F02"
8 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_csv.Rmd="60D253CC"
9 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Fbulk_excel.Rmd="AB15AAE"
10 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Findex.Rmd="741CD66D"
11 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Ftweet_cap.Rmd="3881D3BE"
12 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Fdocs%2Funtangle.Rmd="DB4BBF72"
13 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Freadme.MD="AEC4A068"
14 | ~%2FDocuments%2FGithub%2Fmuckrakr%2Ftweet_cap.Rmd="FA247785"
15 |
--------------------------------------------------------------------------------
/man/bulk_excel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/bulk_excel.R
3 | \name{bulk_excel}
4 | \alias{bulk_excel}
5 | \title{Creating a single data frame from a folder of similarly structured Excel spreadsheets}
6 | \usage{
7 | bulk_excel(folder = "DEFAULTBULKCSV2017",
8 | export = "filenamedefaultbulkcsv2017.csv", sheet = 1, col_names = TRUE,
9 | col_types = NULL, na = "", skip = 0)
10 | }
11 | \arguments{
12 | \item{folder}{The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.}
13 |
14 | \item{export}{File name to export csv file as, if wanted.}
15 | }
16 | \description{
17 | Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet.
18 | }
19 | \examples{
20 | bulk_excel(folder="data", export="combined_data.xls")
21 | }
22 | \keyword{appending}
23 | \keyword{data}
24 |
--------------------------------------------------------------------------------
/man/untangle.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/untangle.R
3 | \name{untangle}
4 | \alias{untangle}
5 | \title{Disentangling a complex variable}
6 | \usage{
7 | untangle(data, x, pattern, verbose = FALSE)
8 | }
9 | \arguments{
10 | \item{data}{The name of the dataframe you}
11 |
12 | \item{x}{Column to untangle}
13 |
14 | \item{pattern}{Special characters that separate the variables in the column}
15 |
16 | \item{verbose}{TRUE or FALSE}
17 | }
18 | \description{
19 | We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
20 | The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes
21 | }
22 | \examples{
23 | untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
24 | }
25 | \keyword{data}
26 | \keyword{wrangling}
27 |
--------------------------------------------------------------------------------
/man/bulk_csv.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/bulk_csv.R
3 | \name{bulk_csv}
4 | \alias{bulk_csv}
5 | \title{Creating a single data frame from a folder of similarly structure CSV spreadsheets}
6 | \usage{
7 | bulk_csv(folder = "DEFAULTBULKCSV2017",
8 | export = "filenamedefaultbulkcsv2017.csv", col_names = TRUE,
9 | col_types = NULL, na = c("", "NA"), quoted_na = TRUE, comment = "",
10 | trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max),
11 | progress = interactive())
12 | }
13 | \arguments{
14 | \item{folder}{The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.}
15 |
16 | \item{export}{File name to export csv file as, if wanted.}
17 | }
18 | \description{
19 | Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet.
20 | }
21 | \examples{
22 | bulk_csv(folder="data", export="combined_data.csv")
23 | }
24 | \keyword{appending}
25 | \keyword{data}
26 |
--------------------------------------------------------------------------------
/readme.MD:
--------------------------------------------------------------------------------
1 | # muckrakr
2 |
3 | ----
4 |
5 | A growing collection of R functions and relationship files for data journalists.
6 |
7 | Functions so far:
8 |
9 | * `untangle` - Disentangling a complex variable [[Documentation](http://andrewbtran.github.com/muckrakr/untangle.html)]
10 | * `bulk_csv` - Creating a large data frame by appending a folder of CSV files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_csv.html)]
11 | * `bulk_excel` - Creating a large data frame by appending a folder of Excel files [[Documentation](http://andrewbtran.github.com/muckrakr/bulk_excel.html)]
12 | * `tweet_cap` - Output a screenshot of a tweet (or list of tweets) by URL [[Documentation](http://andrewbtran.github.com/muckrakr/tweet_cap.html)]
13 | * `setup_folders` - Command to set up an optimized folder structure for your project [Documentation to come]
14 | * `dl_file` - Downloads file from URL if file doesn't exist in set folder [Documentation to come]
15 | * `packagr` - Downloading packages that are needed but aren't yet on the system [Documentation to come]
16 |
17 | ## Installation
18 |
19 | ----
20 |
21 | ```
22 | #install.packages("devtools")
23 | devtools::install_github("andrewbtran/muckrakr")
24 | ```
25 |
--------------------------------------------------------------------------------
/.Rproj.user/6400D2F9/persistent-state:
--------------------------------------------------------------------------------
1 | build-last-errors="[]"
2 | build-last-errors-base-dir="~/Documents/Github/muckrakr/"
3 | build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source muckrakr\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/3.3/Resources/library’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘muckrakr’ ...\\n\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** preparing package for lazy loading\\n\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (muckrakr)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
4 | compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
5 | console_procs="[]"
6 | files.monitored-path=""
7 | find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
8 | imageDirtyState="0"
9 | saveActionState="0"
10 |
--------------------------------------------------------------------------------
/R/packagr.R:
--------------------------------------------------------------------------------
1 | #' Downloading packages that are needed but aren't yet on the system
2 | #'
3 | #' @param packages Array of packages found on CRAN
4 | #' @param gh_packages Array of packages found on Github
5 | #' @keywords Downloading packages that are needed but aren't yet on the system
6 | #' @export
7 | #' @examples
8 | #' packagr(packages=c("tidyverse", "rtweet"), gh_packages=c("abtran/muckrakr"))
9 |
10 | packagr <- function(packages=c("tidyverse"), gh_packages=c("abtran/muckrakr")){
11 | require(devtools)
12 |
13 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
14 | if(any(!check)){
15 | pkgs.missing <- packages[!check]
16 | install.packages(pkgs.missing)
17 | check <- sapply(pkgs.missing,require,warn.conflicts = TRUE,character.only = TRUE)
18 | }
19 |
20 | if (gh_packages!="abtran/muckrakr") {
21 | packages <- gsub(".*\\/", "", gh_packages)
22 | gh_packages_df <- data.frame(gh_packages)
23 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
24 | if(any(!check)){
25 | pkgs.missing <- packages[!check]
26 |
27 | gh_packages_df <- filter(gh_packages_df, grepl(paste(pkgs.missing, collapse="|"), gh_packages))
28 | devtools::install_github(gh_packages_df$gh_packages)
29 |
30 | check <- sapply(packages,require,warn.conflicts = TRUE,character.only = TRUE)
31 |
32 | }
33 | }
34 | }
35 |
36 |
37 |
--------------------------------------------------------------------------------
/R/untangle.R:
--------------------------------------------------------------------------------
1 | #' Disentangling a complex variable
2 | #'
3 | #' We are occassionally faced with a data set variable that contains multiple pieces of information. Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
4 | #' The *untangle* function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes
5 | #' @param data The name of the dataframe you
6 | #' @param x Column to untangle
7 | #' @param pattern Special characters that separate the variables in the column
8 | #' @param verbose TRUE or FALSE
9 | #' @keywords data wrangling
10 | #' @export
11 | #' @examples
12 | #' untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
13 |
14 | untangle <- function(data, x, pattern, verbose=FALSE){
15 | require(stringr)
16 |
17 | variable <- str_to_lower(data[[x]])
18 |
19 | # obtain list of unique codes
20 | code_matrix <- str_split(variable, pattern, simplify=TRUE)
21 | code_vector <- as.character(code_matrix)
22 | code_vector <- str_trim(code_vector)
23 | code_vector <- unique(code_vector[code_vector != ""])
24 | if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n")
25 |
26 | # create dummy codes matrix
27 | nobs <- nrow(data)
28 | ncodes <- length(code_vector)
29 | dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes)
30 |
31 | # add 1/0 codes
32 | for(i in 1:ncodes){
33 | dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0)
34 | }
35 |
36 | # add dummy code names
37 | dummy_codes <- as.data.frame(dummy_codes)
38 | codenames <- str_replace_all(code_vector, " ", "_")
39 | names(dummy_codes) <- codenames
40 |
41 | # add to data frame
42 | newdata <- cbind(data, dummy_codes)
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/docs/bulk_csv.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "bulk_csv()"
3 | author: "Andrew Ba Tran"
4 | date: "March 30, 2017"
5 | output: html_document
6 | ---
7 |
8 | Creating a single data frame from a folder of similarly structure CSV spreadsheets.
9 |
10 | ## Description
11 |
12 | Like Voltron-ing your data
13 |
14 | ## Usage
15 |
16 | bulk_csv(folder = "DEFAULTBULKCSV2017",
17 | export = "filenamedefaultbulkcsv2017.csv")
18 |
19 | ## Arguments
20 |
21 | * `folder` - The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.
22 | * `export` - File name to export csv file as, if wanted.
23 |
24 | ## Note
25 |
26 | This package is based on [**readr**](https://blog.rstudio.org/2015/04/09/readr-0-1-0/), so passing on variables from `read_csv` will work in `bulk_csv`. These variables will apply to the import of each CSV sheet.
27 |
28 |
29 | ### Documentation
30 |
31 | In this example, these CSVs reside in a folder called **state_data**.
32 |
33 | ```{r setup, include=FALSE}
34 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
35 | ```
36 |
37 |
38 |

39 |
40 | ----
41 |
42 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent.
43 |
44 | Or the bulk join will not work.
45 |
46 | To create a new dataframe with all the data sets combined, the command would be:
47 |
48 | `combined_data <- bulk_csv(folder="state_data")`
49 |
50 | To simply export the data as a new combined CSV file, the command would be:
51 |
52 | `bulk_csv(folder="state_data", export="combined_data.csv")`
53 |
54 | This would save the file to the current working directory.
55 |
56 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like:
57 |
58 | `bulk_csv(folder="state_data", export="data_export/combined_data.csv")`
59 |
60 |
--------------------------------------------------------------------------------
/docs/bulk_excel.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "bulk_excel()"
3 | author: "Andrew Ba Tran"
4 | date: "March 30, 2017"
5 | output: html_document
6 | ---
7 |
8 | Creating a single data frame from a folder of similarly structure Excel spreadsheets.
9 |
10 | ## Description
11 |
12 | Like Voltron-ing your data
13 |
14 | ## Usage
15 |
16 | bulk_excel(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls")
17 |
18 | ## Arguments
19 |
20 | * `folder` - The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.
21 | * `export` - File name to export CSV file (Not as an Excel file, sorry) as, if wanted.
22 |
23 | ## Note
24 |
25 | This package is based on [**readxl**](https://github.com/tidyverse/readxl), so passing on variables from `read_excel` will work in `bulk_excel`. These variables will apply to the import of each Excel spreadsheet.
26 |
27 | ### Documentation
28 |
29 | In this example, these Excel spreadsheets reside in a folder called **state_data**.
30 |
31 | ```{r setup, include=FALSE}
32 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
33 | ```
34 |
35 |
36 |

37 |
38 | ----
39 |
40 | The data structure **must** be similar for each data set, as in column names and data types within the columns are consistent.
41 |
42 | Or the bulk join will not work.
43 |
44 | To create a new dataframe with all the data sets combined, the command would be:
45 |
46 | `combined_data <- bulk_excel(folder="state_data")`
47 |
48 | To simply export the data as a new combined CSV file, the command would be:
49 |
50 | `bulk_excel(folder="state_data", export="combined_data.csv")`
51 |
52 | This would save the file to the current working directory.
53 |
54 | You can specify the location to download in `export` as long as you put the folder structure before the CSV file name, like:
55 |
56 | `bulk_excel(folder="state_data", export="data_export/combined_data.csv")`
57 |
58 |
--------------------------------------------------------------------------------
/R/tweet_cap.R:
--------------------------------------------------------------------------------
1 | #' Generate a screenshot of a tweet
2 | #'
3 | #' Quick screenshot of a tweet based on a link
4 | #' @param link the URL of the tweet
5 | #' @param filename the base name of the image you want to save. Default will be user's twitter handle.
6 | #' @param folder the folder you want to save the screenshot to
7 | #' @keywords twitter screenshot
8 | #' @export
9 | #' @examples
10 | #' tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
11 |
12 |
13 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
14 |
15 | if(!require(devtools)){
16 | install.packages("devtools")
17 | library(devtools)
18 | }
19 | if(!require(webshot)){
20 | devtools::install_github("wch/webshot")
21 | library(webshot)
22 | }
23 |
24 | if (folder=="DEFAULTTWEETCAP") {
25 | folder=getwd()
26 | } else {
27 | folder <- paste0(getwd(), "/", folder)
28 | }
29 |
30 | if (length(link)==1) {
31 | link = link
32 | username <- gsub("https://twitter.com/", "", link)
33 | username <- gsub("/.*","",username, fixed=F)
34 | if (filename=="NOTHINGTWEET_CAP") {
35 | pre_name <- username
36 | } else {
37 | pre_name <- filename
38 | }
39 | id_num <- gsub(".*/", "", link)
40 | image_name <- paste0(pre_name, id_num, ".png")
41 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
42 |
43 | }
44 |
45 | if (length(link)>1) {
46 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
47 |
48 | for (i in 1:length(link)){
49 |
50 | link_i = link[i]
51 | username <- gsub("https://twitter.com/", "", link_i)
52 | username <- gsub("/.*","",username, fixed=F)
53 |
54 | if (filename=="NOTHINGTWEET_CAP") {
55 | pre_name <- username
56 | } else {
57 | pre_name <- filename
58 | }
59 | id_num <- gsub(".*/", "", link_i)
60 | image_name <- paste0(pre_name, id_num, ".png")
61 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
62 | setTxtProgressBar(pb, i)
63 | }
64 | }
65 |
66 |
67 | }
68 |
69 |
--------------------------------------------------------------------------------
/docs/tweet_cap.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "tweet_cap()"
3 | author: "Andrew"
4 | date: "3/31/2017"
5 | output: html_document
6 | ---
7 |
8 | Generate a screenshot of a tweet
9 |
10 | ## Description
11 |
12 | Quick screenshot of a tweet based on a link or array of links. The image will represent how the tweet looks on a browser.
13 |
14 | ## Usage
15 |
16 | tweet_cap(link = "", filename = "NOTHINGTWEET_CAP", folder = "DEFAULTTWEETCAP")
17 |
18 | ## Arguments
19 |
20 |
21 | * `link` - the URL of the tweet
22 | * `filename` -the base name of the image you want to save. Default will be user's twitter handle.
23 | * `folder` - the folder you want to save the screenshot to. If ignored, the folder will be the working directory.
24 |
25 | ### Documentation
26 |
27 | #### Single tweet
28 |
29 | To grat one tweet, just put in the URL address.
30 |
31 | `tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")`
32 |
33 | This image will be generated to the **images** folder.
34 |
35 |
36 |

37 |
38 | ----
39 |
40 | #### Multiple tweets
41 |
42 | If you had a data frame of links:
43 |
44 | ```{r tweets_list, warning=F, message=F}
45 | library(tidyverse)
46 | library(knitr)
47 | ```
48 |
49 | ```{r loading_data}
50 | tweets <- tribble(
51 | ~Link, ~Account,
52 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
53 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
54 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
55 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator",
56 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo"
57 | )
58 |
59 | kable(tweets)
60 | ```
61 |
62 | To generate a folder of images of the list of tweets, use:
63 |
64 | ```{r, eval=F}
65 | tweet_cap(tweets$Link, folder="images")
66 | ```
67 |
68 | And you'll get:
69 |
70 |
71 |

72 |
--------------------------------------------------------------------------------
/docs/untangle.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Disentangling a complex variable"
3 | author: "Rob Kabacoff, Ph.D."
4 | date: "March 9, 2017"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = FALSE, message = FALSE)
10 | ```
11 |
12 | ## Untangling a complex variable
13 |
14 | We are occassionally faced with a data set variable that contains multiple pieces of information. Consider the variable **charges** in the following dataset **test_data**.
15 |
16 | ```{r dataset, warning=F, message=F}
17 | test_data <- data.frame(id=1:4,
18 | charges = c("Murder,Kidnapping,Burglary",
19 | "burglary: assault and battery",
20 | "murder",
21 | ""))
22 | knitr::kable(test_data, cpation="test_data")
23 | ```
24 |
25 | Multiple pieces of information are stored in this variable. In this case these pieces are delineated by a comma or colon. As is, the variable is not useful.
26 |
27 | The **untangle** function can be used to create a set of dummy codes from this variable that will be more useful. This results in a new dataset with dummy codes:
28 |
29 | ```{r function, warning=F}
30 | untangle <- function(data, x, pattern, verbose=FALSE){
31 | require(stringr)
32 |
33 | variable <- str_to_lower(data[[x]])
34 |
35 | # obtain list of unique codes
36 | code_matrix <- str_split(variable, pattern, simplify=TRUE)
37 | code_vector <- as.character(code_matrix)
38 | code_vector <- str_trim(code_vector)
39 | code_vector <- unique(code_vector[code_vector != ""])
40 | if(verbose) cat("[Unique Codes] ", code_vector, "\n", sep="\n")
41 |
42 | # create dummy codes matrix
43 | nobs <- nrow(data)
44 | ncodes <- length(code_vector)
45 | dummy_codes <- matrix(rep(NA, times=nobs*ncodes), ncol=ncodes)
46 |
47 | # add 1/0 codes
48 | for(i in 1:ncodes){
49 | dummy_codes[,i] <- ifelse(str_detect(variable, code_vector[i]), 1, 0)
50 | }
51 |
52 | # add dummy code names
53 | dummy_codes <- as.data.frame(dummy_codes)
54 | codenames <- str_replace_all(code_vector, " ", "_")
55 | names(dummy_codes) <- codenames
56 |
57 | # add to data frame
58 | newdata <- cbind(data, dummy_codes)
59 |
60 | }
61 |
62 | ```
63 |
64 | ```{r, echo=TRUE, warning=F}
65 | test_data <- untangle(data=test_data, x="charges", pattern="[,:]", verbose=TRUE)
66 |
67 | ```
68 |
69 |
70 |
71 | ```{r results}
72 | knitr::kable(test_data, caption="New test_data")
73 |
74 | ```
75 |
76 |
77 |
--------------------------------------------------------------------------------
/R/bulk_excel.R:
--------------------------------------------------------------------------------
1 | #' Creating a single data frame from a folder of similarly structured Excel spreadsheets
2 | #'
3 | #' Like Voltron-ing your Excel data. This package is based on readxl, so passing on variables from read_excel will work in bulk_excel. These variables will apply to the import of each Excel spreadsheet.
4 |
5 | #' @param folder The folder in relation to your working directory where the Excel files exist. Default folder is the current working directory.
6 | #' @param export File name to export csv file as, if wanted.
7 | #' @keywords data appending
8 | #' @export
9 | #' @examples
10 | #' bulk_excel(folder="data", export="combined_data.xls")
11 |
12 | # This is the read.csv method using lapply
13 |
14 | #bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.xls"){
15 | # require(dplyr)
16 | #
17 | # if (folder=="DEFAULTBULKCSV2017" | folder=="") {
18 | # folder <- getwd()
19 | # } else {
20 | # folder <- paste0(getwd(), "/", folder)
21 | # }
22 | # files = list.files(folder, pattern="*.csv")
23 | # tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows()
24 | # return(tbl)
25 | # if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
26 | # write_csv(tbl, export)
27 | # }
28 | #}
29 |
30 |
31 |
32 | bulk_excel <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", sheet = 1, col_names = TRUE, col_types = NULL, na = "",
33 | skip = 0){
34 | require(readxl)
35 | require(dplyr)
36 | require(readr)
37 |
38 | sheet_num_f <- sheet
39 | col_names_f <- col_names
40 | col_types_f <- col_types
41 | na_f <- na
42 | skip_f <- skip
43 |
44 | if (folder=="DEFAULTBULKCSV2017" | folder=="") {
45 | folder <- getwd()
46 | } else {
47 | folder <- paste0(getwd(), "/", folder)
48 | }
49 |
50 | files = list.files(folder, pattern=c("*.xls", "*.xlsx"))
51 |
52 | files <- files[!grepl("~", substr(files, 1,1))]
53 |
54 |
55 | pb <- txtProgressBar(min = 0, max = length(files), style = 3)
56 |
57 | for (i in 1:length(files)) {
58 | file_x <- read_excel(paste0(folder, "/", files[i]), sheet = sheet_f, col_names = col_names_f , col_types = col_types_f, na = na_f,
59 | skip = skip_f)
60 | colnames(file_x) <- ifelse(is.na(colnames(file_x)), "", colnames(file_x))
61 | if (i ==1) {
62 | all_files <- file_x
63 | } else {
64 | all_files <- rbind(all_files, file_x)
65 | }
66 | setTxtProgressBar(pb, i)
67 |
68 | }
69 |
70 | if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
71 | write_csv(all_files, export)
72 | }
73 | return(all_files)
74 |
75 | }
76 |
77 |
78 |
--------------------------------------------------------------------------------
/R/bulk_csv.R:
--------------------------------------------------------------------------------
1 | #' Creating a single data frame from a folder of similarly structure CSV spreadsheets
2 | #'
3 | #' Like Voltron-ing your data. This package is based on readr, so passing on variables from read_csv will work in bulk_csv. These variables will apply to the import of each CSV sheet.
4 | #' @param folder The folder in relation to your working directory where the csv files exist. Default folder is the current working directory.
5 | #' @param export File name to export csv file as, if wanted.
6 | #' @keywords data appending
7 | #' @export
8 | #' @examples
9 | #' bulk_csv(folder="data", export="combined_data.csv")
10 |
11 | # This is the read.csv method using lapply
12 |
13 | #bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", ){
14 | # require(dplyr)
15 | #
16 | # if (folder=="DEFAULTBULKCSV2017" | folder=="") {
17 | # folder <- getwd()
18 | # } else {
19 | # folder <- paste0(getwd(), "/", folder)
20 | # }
21 | # files = list.files(folder, pattern="*.csv")
22 | # tbl <- lapply(paste0(folder, "/", files), read.csv) %>% bind_rows()
23 | # return(tbl)
24 | # if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
25 | # write_csv(tbl, export)
26 | # }
27 | #}
28 |
29 | # this is the readr version but it loops and has a progress bar
30 |
31 | bulk_csv <- function(folder="DEFAULTBULKCSV2017", export="filenamedefaultbulkcsv2017.csv", col_names = TRUE, col_types = NULL,
32 | na = c("", "NA"), quoted_na = TRUE,
33 | comment = "", trim_ws = TRUE, skip = 0, n_max = Inf,
34 | guess_max = min(1000, n_max), progress = interactive()){
35 | require(readr)
36 | require(dplyr)
37 |
38 | col_names_f <- col_names
39 | col_types_f <- col_types
40 | na_f <- na
41 | quoted_na_f <- quoted_na
42 | comment_f <- comment
43 | trim_ws_f <- trim_ws
44 | skip_f <- skip
45 | n_max_f <- n_max
46 | guess_max_f <- guess_max
47 | progress_f <- progress
48 |
49 | if (folder=="DEFAULTBULKCSV2017" | folder=="") {
50 | folder <- getwd()
51 | } else {
52 | folder <- paste0(getwd(), "/", folder)
53 | }
54 |
55 | files = list.files(folder, pattern="*.csv")
56 |
57 | pb <- txtProgressBar(min = 0, max = length(files), style = 3)
58 |
59 | for (i in 1:length(files)) {
60 | file_x <- read_csv(paste0(folder, "/", files[i]), , col_names = col_names_f, col_types = col_types_f,
61 | na = na_f, quoted_na = quoted_na_f,
62 | comment = comment_f, trim_ws = trim_ws_f, skip = skip_f, n_max = n_max_f,
63 | guess_max = guess_max_f, progress = progress_f)
64 | if (i ==1) {
65 | all_files <- file_x
66 | } else {
67 | all_files <- rbind(all_files, file_x)
68 | }
69 | setTxtProgressBar(pb, i)
70 |
71 | }
72 |
73 | if (export!="filenamedefaultbulkcsv2017.csv" & export!="") {
74 | write_csv(all_files, export)
75 | }
76 | return(all_files)
77 |
78 | }
79 |
80 |
--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
1 | install.packages("muckrock")
2 | library(muckrock)
3 | ?muckrock
4 | ag <- agency_metadata
5 | View(ag)
6 | ju <- jurisdiction_metadata
7 | View(ju)
8 | re <- request_metadata
9 | View(re)
10 | library(muckraker)
11 | ?untangle
12 | library(roxygen2)
13 | roxygenise()
14 | library(muckraker)
15 | ?untangle
16 | roxygenise()
17 | library(muckraker)
18 | ?untangle
19 | roxygenise()
20 | library(muckraker)
21 | ?muckraker
22 | library(muckraker)
23 | library(muckrakr)
24 | library(muckrakr)
25 | library(readr)
26 | ?read_csv
27 | ?list.files
28 | library(roxygen2)
29 | roxygenise()
30 | roxygenise()
31 | library(muckrakr)
32 | library(muckrakr)
33 | roxygenise()
34 | library(muckrakr)
35 | roxygenise()
36 | library(muckrakr)
37 | library(muckrakr)
38 | library(readxl)
39 | ?list.files
40 | ?read_excel
41 | ?write_excel
42 | ?readxl
43 | files = list.files(folder, pattern=c("*.xls", "*.xlsx"))
44 | link="https://twitter.com/memeprovider/status/833888807959289856"
45 | length(link)
46 | link="https://twitter.com/memeprovider/status/833888807959289856"
47 | gsub("https://twitter.com/", "", link)
48 | username <- gsub("https://twitter.com/", "", link)
49 | ?gsub
50 | username <- gsub("https://twitter.com/", "", link)
51 | gsub("/.*","" fixed=T)
52 | gsub("/.*","", fixed=T)
53 | gsub("/.*","",username, fixed=T)
54 | gsub("/*.","",username, fixed=T)
55 | gsub("///*.","",username, fixed=T)
56 | gsub("\\/*.","",username, fixed=T)
57 | gsub("\\/.*","",username, fixed=T)
58 | gsub("status.*","",username, fixed=T)
59 | gsub("status*.","",username, fixed=T)
60 | gsub(".*status","",username, fixed=T)
61 | gsub(".*status","",username, fixed=F)
62 | gsub("/.*","",username, fixed=F)
63 | id_num <- gsub(".*/", "", link)
64 | id_num
65 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
66 | if(!require(devtools)){
67 | install.packages("devtools")
68 | library(devtools)
69 | }
70 | if(!require(webshot)){
71 | devtools::install_github("wch/webshot")
72 | library(webshot)
73 | }
74 | if (folder=="DEFAULTTWEETCAP") {
75 | folder=getwd()
76 | } else {
77 | folder <- paste0(getwd(), "/", folder)
78 | }
79 | if (length(link)==1) {
80 | link = link
81 | username <- gsub("https://twitter.com/", "", link)
82 | username <- gsub("/.*","",username, fixed=F)
83 | if (name=="NOTHINGTWEET_CAP") {
84 | pre_name <- username
85 | } else {
86 | pre_name <- filename
87 | }
88 | id_num <- gsub(".*/", "", link)
89 | image_name <- paste0(pre_name, id_num, .png)
90 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
91 | }
92 | }
93 | tweet_cap
94 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
95 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
96 | if(!require(devtools)){
97 | install.packages("devtools")
98 | library(devtools)
99 | }
100 | if(!require(webshot)){
101 | devtools::install_github("wch/webshot")
102 | library(webshot)
103 | }
104 | if (folder=="DEFAULTTWEETCAP") {
105 | folder=getwd()
106 | } else {
107 | folder <- paste0(getwd(), "/", folder)
108 | }
109 | if (length(link)==1) {
110 | link = link
111 | username <- gsub("https://twitter.com/", "", link)
112 | username <- gsub("/.*","",username, fixed=F)
113 | if (filename=="NOTHINGTWEET_CAP") {
114 | pre_name <- username
115 | } else {
116 | pre_name <- filename
117 | }
118 | id_num <- gsub(".*/", "", link)
119 | image_name <- paste0(pre_name, id_num, .png)
120 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
121 | }
122 | }
123 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
124 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
125 | if(!require(devtools)){
126 | install.packages("devtools")
127 | library(devtools)
128 | }
129 | if(!require(webshot)){
130 | devtools::install_github("wch/webshot")
131 | library(webshot)
132 | }
133 | if (folder=="DEFAULTTWEETCAP") {
134 | folder=getwd()
135 | } else {
136 | folder <- paste0(getwd(), "/", folder)
137 | }
138 | if (length(link)==1) {
139 | link = link
140 | username <- gsub("https://twitter.com/", "", link)
141 | username <- gsub("/.*","",username, fixed=F)
142 | if (filename=="NOTHINGTWEET_CAP") {
143 | pre_name <- username
144 | } else {
145 | pre_name <- filename
146 | }
147 | id_num <- gsub(".*/", "", link)
148 | image_name <- paste0(pre_name, id_num, ".png")
149 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
150 | }
151 | }
152 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
153 | roxygenise()
154 | library(muckrakr)
155 | ?bulk_csv
156 | ?bulk_csv
157 | ?read_excel
158 | ?read_csv
159 | roxygenise()
160 | roxygenise()
161 | library(muckrakr)
162 | knitr::opts_chunk$set(echo = TRUE)
163 | tribble(
164 | ~Link, ~Account,
165 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
166 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
167 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
168 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator",
169 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo"
170 | )
171 | library(tidyverse)
172 | tribble(
173 | ~Link, ~Account,
174 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
175 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
176 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
177 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator",
178 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo"
179 | )
180 | tribble(
181 | ~Link, ~Account,
182 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
183 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
184 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
185 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator",
186 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo"
187 | )
188 | tweets <- tribble(
189 | ~Link, ~Account,
190 | "https://twitter.com/realDonaldTrump/status/847766558520856578", "realDonaldTrump",
191 | "https://twitter.com/realDonaldTrump/status/847056211006631936", "realDonaldTrump",
192 | "https://twitter.com/realDonaldTrump/status/846854703183020032", "realDonaldTrump",
193 | "https://twitter.com/byrdinator/status/847600835148128258", "byrdinator",
194 | "https://twitter.com/Gizmodo/status/847532891127074823", "Gizmodo"
195 | )
196 | View(tweets)
197 | tweet_cap(link=tweets$Link, folder="images")
198 | tweets$Link
199 | link=tweets$Link
200 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
201 | for (i in length(link)){
202 | link_i = link[i]
203 | username <- gsub("https://twitter.com/", "", link_i)
204 | username <- gsub("/.*","",username, fixed=F)
205 | if (filename=="NOTHINGTWEET_CAP") {
206 | pre_name <- username
207 | } else {
208 | pre_name <- filename
209 | }
210 | id_num <- gsub(".*/", "", link_i)
211 | image_name <- paste0(pre_name, id_num, ".png")
212 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
213 | setTxtProgressBar(pb, i)
214 | }
215 | filename="NOTHINGTWEET_CAP"
216 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
217 | for (i in length(link)){
218 | link_i = link[i]
219 | username <- gsub("https://twitter.com/", "", link_i)
220 | username <- gsub("/.*","",username, fixed=F)
221 | if (filename=="NOTHINGTWEET_CAP") {
222 | pre_name <- username
223 | } else {
224 | pre_name <- filename
225 | }
226 | id_num <- gsub(".*/", "", link_i)
227 | image_name <- paste0(pre_name, id_num, ".png")
228 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
229 | setTxtProgressBar(pb, i)
230 | }
231 | folder="DEFAULTTWEETCAP"
232 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
233 | for (i in length(link)){
234 | link_i = link[i]
235 | username <- gsub("https://twitter.com/", "", link_i)
236 | username <- gsub("/.*","",username, fixed=F)
237 | if (filename=="NOTHINGTWEET_CAP") {
238 | pre_name <- username
239 | } else {
240 | pre_name <- filename
241 | }
242 | id_num <- gsub(".*/", "", link_i)
243 | image_name <- paste0(pre_name, id_num, ".png")
244 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
245 | setTxtProgressBar(pb, i)
246 | }
247 | getwd()
248 | roxygenise()
249 | library(muckrakr)
250 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
251 | tweet_cap(link=tweets$Link, folder="images")
252 | tweet_cap(tweets$Link, folder="images")
253 | tweets$Link
254 | str(tweets$Link)
255 | str("https://twitter.com/memeprovider/status/833888807959289856")
256 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
257 | if(!require(devtools)){
258 | install.packages("devtools")
259 | library(devtools)
260 | }
261 | if(!require(webshot)){
262 | devtools::install_github("wch/webshot")
263 | library(webshot)
264 | }
265 | if (folder=="DEFAULTTWEETCAP") {
266 | folder=getwd()
267 | } else {
268 | folder <- paste0(getwd(), "/", folder)
269 | }
270 | if (length(link)==1) {
271 | link = link
272 | username <- gsub("https://twitter.com/", "", link)
273 | username <- gsub("/.*","",username, fixed=F)
274 | if (filename=="NOTHINGTWEET_CAP") {
275 | pre_name <- username
276 | } else {
277 | pre_name <- filename
278 | }
279 | id_num <- gsub(".*/", "", link)
280 | image_name <- paste0(pre_name, id_num, ".png")
281 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
282 | }
283 | if (length(link)>1) {
284 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
285 | for (i in length(link)){
286 | link_i = link[i]
287 | username <- gsub("https://twitter.com/", "", link_i)
288 | username <- gsub("/.*","",username, fixed=F)
289 | if (filename=="NOTHINGTWEET_CAP") {
290 | pre_name <- username
291 | } else {
292 | pre_name <- filename
293 | }
294 | id_num <- gsub(".*/", "", link_i)
295 | image_name <- paste0(pre_name, id_num, ".png")
296 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
297 | setTxtProgressBar(pb, i)
298 | }
299 | }
300 | }
301 | tweet_cap(tweets$Link)
302 | tweet_cap(tweets$Link, folder="images")
303 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
304 | if(!require(devtools)){
305 | install.packages("devtools")
306 | library(devtools)
307 | }
308 | if(!require(webshot)){
309 | devtools::install_github("wch/webshot")
310 | library(webshot)
311 | }
312 | if (folder=="DEFAULTTWEETCAP") {
313 | folder=getwd()
314 | } else {
315 | folder <- paste0(getwd(), "/", folder)
316 | }
317 | if (length(link)==1) {
318 | link = link
319 | username <- gsub("https://twitter.com/", "", link)
320 | username <- gsub("/.*","",username, fixed=F)
321 | if (filename=="NOTHINGTWEET_CAP") {
322 | pre_name <- username
323 | } else {
324 | pre_name <- filename
325 | }
326 | id_num <- gsub(".*/", "", link)
327 | image_name <- paste0(pre_name, id_num, ".png")
328 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
329 | }
330 | if (length(link)>1) {
331 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
332 | for (i in length(link)){
333 | link_i = link[i]
334 | username <- gsub("https://twitter.com/", "", link_i)
335 | username <- gsub("/.*","",username, fixed=F)
336 | if (filename=="NOTHINGTWEET_CAP") {
337 | pre_name <- username
338 | } else {
339 | pre_name <- filename
340 | }
341 | id_num <- gsub(".*/", "", link_i)
342 | image_name <- paste0(pre_name, id_num, ".png")
343 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
344 | setTxtProgressBar(pb, i)
345 | }
346 | }
347 | }
348 | tweet_cap(tweets$Link, folder="images")
349 | link <- tweets$Link
350 | filename="NOTHINGTWEET_CAP"
351 | folder <- "test"
352 | length(link)>1
353 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
354 | pb
355 | tweet_cap <- function(link="", filename="NOTHINGTWEET_CAP", folder="DEFAULTTWEETCAP"){
356 | if(!require(devtools)){
357 | install.packages("devtools")
358 | library(devtools)
359 | }
360 | if(!require(webshot)){
361 | devtools::install_github("wch/webshot")
362 | library(webshot)
363 | }
364 | if (folder=="DEFAULTTWEETCAP") {
365 | folder=getwd()
366 | } else {
367 | folder <- paste0(getwd(), "/", folder)
368 | }
369 | if (length(link)==1) {
370 | link = link
371 | username <- gsub("https://twitter.com/", "", link)
372 | username <- gsub("/.*","",username, fixed=F)
373 | if (filename=="NOTHINGTWEET_CAP") {
374 | pre_name <- username
375 | } else {
376 | pre_name <- filename
377 | }
378 | id_num <- gsub(".*/", "", link)
379 | image_name <- paste0(pre_name, id_num, ".png")
380 | webshot(link, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
381 | }
382 | if (length(link)>1) {
383 | pb <- txtProgressBar(min = 0, max = length(link), style = 3)
384 | for (i in 1:length(link)){
385 | link_i = link[i]
386 | username <- gsub("https://twitter.com/", "", link_i)
387 | username <- gsub("/.*","",username, fixed=F)
388 | if (filename=="NOTHINGTWEET_CAP") {
389 | pre_name <- username
390 | } else {
391 | pre_name <- filename
392 | }
393 | id_num <- gsub(".*/", "", link_i)
394 | image_name <- paste0(pre_name, id_num, ".png")
395 | webshot(link_i, paste0(folder, "/", image_name), selector=c(".permalink-inner", ".permalink-tweet-container"))
396 | setTxtProgressBar(pb, i)
397 | }
398 | }
399 | }
400 | tweet_cap(tweets$Link, folder="images")
401 | ?tweet_cap
402 | tweet_cap(link="https://twitter.com/memeprovider/status/833888807959289856", folder="images")
403 | tweet_cap(tweets$Link, folder="images")
404 | roxygenize()
405 | library(muckrakr)
406 |
--------------------------------------------------------------------------------