├── assment_one.Rmd
├── auto_reporting.R
├── auto_reporting.Rmd
├── iterative_reporting
    ├── markdown_multiple_reports.Rmd
    ├── markdown_multiple_reports.html
    ├── r-tmp
    │   ├── loop.R
    │   ├── report.Rmd
    │   └── reports
    │   │   ├── test_report_doc1_2018-04-05.docx
    │   │   └── test_report_doc2_2018-04-05.docx
    ├── r_script.R
    └── rmarkdown_script.Rmd
├── olympics.csv
├── portland mapping
    ├── .Rapp.history
    ├── GISwR.pdf
    ├── ggmaps.R
    └── mapping in r.R
├── rater reliability .R
├── read in ipeds data.R
├── read_googlesheet.R
├── read_multiple_txt_files.R
├── rename_colnames_base_on_crosswalk
├── rmaps.R
├── rvest.R
├── ts_graphs.R
└── tutorials
    ├── Histograms advanced.Rmd
    ├── Histograms_advanced.html
    ├── ames.csv
    ├── colors.Rmd
    ├── colors.html
    ├── create_variables.Rmd
    ├── create_variables.html
    ├── creating html tables.Rmd
    ├── creating_html_tables.html
    ├── dplyr.Rmd
    ├── dplyr.html
    ├── evals.RData
    ├── excel.Rmd
    ├── excel.html
    ├── facets.Rmd
    ├── facets.html
    ├── geocoder.Rmd
    ├── geocoder.html
    ├── geocoding.Rmd
    ├── geocoding.html
    ├── histograms.Rmd
    ├── histograms.html
    ├── histograms_I.Rmd
    ├── histograms_I.html
    ├── histograms_pdf.Rmd
    ├── histograms_pdf.pdf
    ├── line graphs advanced.Rmd
    ├── line graphs.Rmd
    ├── line_graphs.html
    ├── line_graphs_advanced.html
    ├── load_data.Rmd
    ├── load_data.html
    ├── loops_with_ggplot2.Rmd
    ├── loops_with_ggplot2.html
    ├── markdown_formatting.Rmd
    ├── markdown_formatting.html
    ├── matriculants.Rmd
    ├── matriculants.html
    ├── missing_data.Rmd
    ├── missing_data.html
    ├── my-report.html
    ├── read_and_summarize_multiple_txt.Rmd
    ├── read_and_summarize_multiple_txt.html
    ├── reading_and_writing.Rmd
    ├── reading_and_writing.html
    ├── reading_and_writing.pdf
    ├── regex.Rmd
    ├── regex.html
    ├── reordering_geom_bar.Rmd
    ├── reordering_geom_bar.html
    ├── reproducible_research.Rmd
    ├── reproducible_research.pdf
    ├── reproducible_research.tex
    ├── rvest.Rmd
    ├── rvest.html
    ├── scatter plots advanced.Rmd
    ├── scatter plots.Rmd
    ├── scatter_plots.html
    ├── scatter_plots_advanced.html
    ├── summary_statistics.Rmd
    ├── summary_statistics.html
    ├── summary_statistics.pdf
    ├── ttests_pdf.Rmd
    └── ttests_pdf.pdf


/assment_one.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Assessment I: Sports Analytics Jan Plan"
  3 | name: ""
  4 | output: html_document
  5 | ---
  6 | 
  7 | ```{r setup, include=FALSE}
  8 | knitr::opts_chunk$set(echo = TRUE)
  9 | ```
 10 | 
 11 | ## Section 1: Loading Packages and Data
 12 | 
 13 | ##### Load the tidyverse library
 14 | ```{r}
 15 | ```
 16 | 
 17 | ##### Load the Olympic medals data into a data frame called olympics
 18 | 
 19 | ```{r, echo=FALSE}
 20 | 
 21 | ```
 22 | 
 23 | Here's some info about variables in the data: 
 24 | 
 25 | - ID - Unique number for each athlete
 26 | - Name - Athlete's name
 27 | - Sex - M or F
 28 | - Age - Integer
 29 | - Height - In centimeters
 30 | - Weight - In kilograms
 31 | - Team - Team name
 32 | - NOC - National Olympic Committee 3-letter code
 33 | - Games - Year and season
 34 | - Year - Integer
 35 | - Season - Summer or Winter
 36 | - City - Host city
 37 | - Sport - Sport
 38 | - Event - Event
 39 | - Medal - Gold, Silver, Bronze, or NA
 40 | 
 41 | ## Section 2: Describing the data
 42 | 
 43 | ##### How many rows are in the data? 
 44 | 
 45 | ##### How many columns are in the data?
 46 | 
 47 | 
 48 | 
 49 | ## Section 3: Country (NOC) Medal Counts
 50 | 
 51 | ##### Which country (NOC) has won the most medals?  
 52 | ```{r}
 53 | 
 54 | ```
 55 | 
 56 | ##### Which country has won the most medals in the winter games? 
 57 | ```{r}
 58 | 
 59 | ```
 60 | 
 61 | ## Section 4: Sport Medal Counts
 62 | 
 63 | ##### Which sport has awarded the fewest medals? 
 64 | ```{r}
 65 | 
 66 | ```
 67 | 
 68 | ##### In how many Olympic Games were medals awarded in the sport of Tug-Of-War?
 69 | ```{r}
 70 | 
 71 | ```
 72 | 
 73 | 
 74 | ## Section 5: BMI
 75 | 
 76 | #####  Which gold medal winner has the highest BMI? (BMI = kg/m^2)
 77 | ```{r}
 78 | 
 79 | ```
 80 | 
 81 | 
 82 | ## Section 6: Graphing
 83 | 
 84 | ##### Create a box of the age of medal winners by sex
 85 | ```{r}
 86 | 
 87 | ```
 88 | 
 89 | ##### Who is that person who won a medal when they were 10? 
 90 | ```{r}
 91 | 
 92 | ```
 93 | 
 94 | ##### Facet the boxplot by Medal
 95 | ```{r}
 96 | 
 97 | ```
 98 | 
 99 | 
100 | ## Section 7: Bonus 
101 | 
102 | ##### Which cities have hosted the games multiple times?
103 | ```{r}
104 | 
105 | ```
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/auto_reporting.R:
--------------------------------------------------------------------------------
 1 | # Reference 
 2 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
 3 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
 4 | # http://willchernoff.com/2013/04/23/periodically-run-an-r-script-as-a-background-process-using-launchd-under-osx/
 5 | 
 6 | library(knitr)
 7 | library(markdown)
 8 | library(rmarkdown)
 9 | library(stringr)
10 | library(ggmap)
11 | 
12 | setwd('/Users/majerus/Desktop/R/auto_reporting/test/reports/')
13 | 
14 | ## knitr loop
15 | mtcars <- mtcars[c(1,5),]
16 | rownames(mtcars) <- str_replace_all(rownames(mtcars), ' ', '')
17 | 
18 | map <-
19 |   get_map(location="United States",
20 |           source= 'google', maptype = 'terrain', color='bw', zoom=4) 
21 |   
22 | 
23 | for (car in unique(rownames(mtcars))){
24 |   # knit2pdf("testingloops.Rnw", output=paste0('report_', hosp, '.tex'))
25 | 
26 |   #knit("/Users/majerus/Desktop/R/auto_reporting/test/reports/r_script_pdf.Rmd")
27 |   
28 |   # output folders 
29 |   folder <- 
30 |     if(mtcars$cyl[rownames(mtcars)==car] == 4) {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl4/"}
31 |     else if(mtcars$cyl[rownames(mtcars)==car] == 6) {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl6/"}
32 |     else {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl8/"}
33 |   
34 |   render(input = 'r_script_pdf.Rmd',
35 |          output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
36 |          output_dir = folder             
37 |          )
38 |   
39 | 
40 | 
41 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd", 
42 | #           output_format = "pdf_document",
43 | #           output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
44 | #           output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
45 | #   
46 | }
47 | 
48 | 
49 | 
50 | # quit(save="no")
51 | 
52 | # rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd', 
53 | #                   output_file =  paste("report_", Sys.Date(), ".html", sep=''), 
54 | #                   output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
55 | # 
56 | 
57 | 
58 | 
59 | # 
60 | # knit("/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd")
61 | # 
62 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd", 
63 | #        output_format = "pdf_document",
64 | #        output_file = paste("test_report_", Sys.Date(), ".pdf", sep=''),
65 | #        output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/")
66 | # 
67 | # 
68 | # # render(input, output_format = NULL, output_file = NULL, output_dir = NULL,
69 | # #        output_options = NULL, intermediates_dir = NULL,
70 | # #        runtime = c("auto", "static", "shiny"),
71 | # #        clean = TRUE, envir = parent.frame(), quiet = FALSE,
72 | # #        encoding = getOption("encoding"))
73 | # 
74 | # 
75 | # 
76 | # ## for html 
77 | # markdownToHTML("r_script.md", 
78 | #                paste("/Users/majerus/Desktop/R/auto_reporting/test/reports/test_report_", Sys.Date(), ".html", sep='')
79 | #                #stylesheet="C:/Users/Rich/Dropbox/tca/Admissions/FM/Project12/yield_model_2014/Dashboard/dashboardcss.css"
80 | # )
81 | 


--------------------------------------------------------------------------------
/auto_reporting.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Example Car Sales Report"
 3 | author: "Rich Majerus"
 4 | output: pdf_document
 5 | ---
 6 | 
 7 | ```{r setup, include=FALSE}
 8 | # set global chunk options
 9 | # opts_chunk$set(cache=FALSE)
10 | library(ggplot2)
11 | library(dplyr)
12 | library(ggmap)
13 | library(knitr)
14 | library(markdown)
15 | library(rmarkdown)
16 | library(xtable)
17 | ```
18 | 
19 | ```{r, echo=FALSE}
20 | #car <- 'Mazda RX4' # for testing 
21 | cars <- mtcars[rownames(mtcars)==car,]
22 | 
23 | # create daily data 
24 | x <- sample(1:10, 1)
25 | 
26 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
27 | 
28 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
29 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
30 | 
31 | ```
32 | 
33 | Today is `r Sys.Date()`.  
34 | 
35 | Today we sold `r nrow(cars)` `r car`. 
36 | 
37 | <br><br>
38 | 
39 | Today we sold cars to people in the following locations: 
40 | 
41 | <br><br>
42 | 
43 | ```{r, echo=FALSE,  message = FALSE}
44 | # map of cars sold
45 | #map <- # now included in r source file so it is only run once 
46 | #get_map(location="United States",
47 | #source= 'google', maptype = 'terrain', color='bw', zoom=4) 
48 | 
49 | ggmap(map) + 
50 | geom_point(aes(x = lon, y = lat), data = cars, alpha = 1, color="darkred", size = 3) +
51 |   scale_size_area(20) 
52 | 
53 | 
54 | ```
55 | 
56 | <br><br>
57 | 
58 | Here is a data table of the cars we sold. 
59 | 
60 | <br><br>
61 | 
62 | ```{r xtable, echo=FALSE, results='asis', message=FALSE}
63 | # data table of cars sold 
64 | 
65 | table <- xtable(cars[,c(1:2, 12:13)])
66 | print(table, type="latex", comment = FALSE)
67 | 
68 | 
69 | ```
70 | 
71 | <br><br>
72 | 
73 | \newpage
74 | 
75 | Here is a plot of mpg vs. weight for the sold cars: 
76 | 
77 | <br><br>
78 | 
79 | ```{r, echo=FALSE}
80 | 
81 | # plot 
82 | 
83 | ggplot(cars, aes(mpg, wt)) + 
84 |  geom_point(position = position_jitter(w = 0.1, h = 0.1)) 
85 | 
86 | 
87 | ```
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/iterative_reporting/markdown_multiple_reports.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Create Multiple Reports with RMarkdown"
  3 | author: "Rich Majerus"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br/>
  8 | 
  9 | ## Introduction 
 10 | Using two files (an r script and an rmarkdown script) we can create a series of reports.  The following example creates five reports using the mtcars data.  The rmarkdown file is called by the rscript one time for each unique car name in the mtcars data.  An example of the reports produced by these files can be found <a href="http://www.reed.edu/data-at-reed/resources/R/markdown_loop_example.html" target="_blank">here</a>.  <br/> 
 11 | 
 12 | The rscript and rmarkdown script referenced in this tutorial can be found <a href="http://www.richmajerus.com/" target="_blank">here</a> along with the rmarkdown file that creates this tutorial.  
 13 | 
 14 | <br/>
 15 | 
 16 | ## File 1: R Script 
 17 | 
 18 | ```{r, eval=FALSE}
 19 | 
 20 | # References for automation 
 21 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
 22 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
 23 | 
 24 | # File 1: Should be an R-Script 
 25 |     # contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
 26 | 
 27 | # load packages
 28 | library(knitr)
 29 | library(markdown)
 30 | library(rmarkdown)
 31 | 
 32 | # use first 5 rows of mtcars as example data
 33 | mtcars <- mtcars[1:5,]
 34 | 
 35 | # create map to plot data on (this is outside the loop so it is only called once)
 36 | map <-
 37 |   get_map(location="United States",
 38 |           source= 'google', maptype = 'terrain', color='bw', zoom=4) 
 39 | 
 40 | # for each type of car in the data create a report
 41 |   # these reports are saved in output_dir with the name specified by output_file
 42 | for (car in unique(rownames(mtcars))){
 43 |   rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd',  # file 2
 44 |                    output_file =  paste("report_", car, '_', Sys.Date(), ".html", sep=''), 
 45 |                    output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
 46 | 
 47 | # for pdf reports  
 48 | #   rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd", 
 49 | #           output_format = "pdf_document",
 50 | #           output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
 51 | #           output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
 52 |   
 53 | }
 54 | ```
 55 | 
 56 | <br/>
 57 | 
 58 | ## File 2: Rmarkdown
 59 | 
 60 | ```{r, eval=FALSE}
 61 | # load packages 
 62 | library(ggplot2)
 63 | library(dplyr)
 64 | library(ggmap)
 65 | library(knitr)
 66 | library(markdown)
 67 | library(rmarkdown)
 68 | library(xtable)
 69 | ```
 70 | 
 71 | ```{r, eval=FALSE}
 72 | 
 73 | # limit data to car name that is currently specified by the loop  
 74 | cars <- mtcars[rownames(mtcars)==car,]
 75 | 
 76 | # create example data for each car 
 77 | x <- sample(1:10, 1)
 78 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
 79 | 
 80 | # create hypotheical lat and lon for each row in cars 
 81 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
 82 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
 83 | 
 84 | ```
 85 | 
 86 | Today is `r Sys.Date()`.  
 87 | 
 88 | <br><br>
 89 | 
 90 | Today we sold cars to people in the following locations: 
 91 | 
 92 | ```{r, eval=FALSE}
 93 | 
 94 | # print map with car loactions 
 95 | ggmap(map) + 
 96 | geom_point(aes(x = lon, y = lat), data = cars,
 97 |  alpha = 1, color="darkred", size = 3)
 98 | 
 99 | 
100 | ```
101 | 
102 | <br><br>
103 | 
104 | Here is a data table of the cars we sold. 
105 | 
106 | <br><br>
107 | 
108 | ```{r xtable, eval=FALSE}
109 | 
110 | # data table of cars sold 
111 | table <- xtable(cars[,c(1:2, 12:13)])
112 | print(table, type="latex", comment = FALSE)
113 | 
114 | 
115 | ```
116 | 
117 | <br><br>
118 | 
119 | \newpage
120 | 
121 | Here is a plot of mpg vs. weight for the sold cars: 
122 | <br><br>
123 | 
124 | ```{r, eval=FALSE}
125 | # plot of mpg vs. wt for cars sold
126 | 
127 | ggplot(cars, aes(mpg, wt)) + 
128 |   geom_point(position = position_jitter(w = 0.1, h = 0.1)) 
129 | 
130 | 
131 | ```
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/loop.R:
--------------------------------------------------------------------------------
 1 | # load libraries
 2 | library(tidyverse)
 3 | library(rmarkdown)
 4 | library(randomNames)
 5 | 
 6 | # create sample data
 7 | tmp <- tibble(
 8 |   doc = c(rep("doc1", 5), 
 9 |           rep("doc2", 5)),
10 |   name = randomNames(10)
11 | )
12 | 
13 | 
14 | for (i in unique(tmp$doc)){
15 |   
16 |   rmarkdown::render(input = paste0(getwd(), "/report.rmd"),
17 |                     output_file = paste0("test_report_", i, "_", Sys.Date(), ".docx", sep=''),
18 |                     output_dir = paste0(getwd(), "/reports/")
19 |   )
20 |   
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/report.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "report"
 3 | author: "Rich Majerus"
 4 | date: "4/5/2018"
 5 | output: word_document
 6 | ---
 7 | 
 8 | 
 9 | Doc: `r i`
10 | Names: `r cat(tmp$name, sep = "\n")` 
11 | 
12 | 


--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/reports/test_report_doc1_2018-04-05.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/iterative_reporting/r-tmp/reports/test_report_doc1_2018-04-05.docx


--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/reports/test_report_doc2_2018-04-05.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/iterative_reporting/r-tmp/reports/test_report_doc2_2018-04-05.docx


--------------------------------------------------------------------------------
/iterative_reporting/r_script.R:
--------------------------------------------------------------------------------
 1 | # Reference 
 2 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
 3 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
 4 | 
 5 | library(knitr)
 6 | library(markdown)
 7 | library(rmarkdown)
 8 | 
 9 | 
10 | ## knitr loop
11 | 
12 | mtcars <- mtcars[1:2,]
13 | 
14 | map <-
15 |   get_map(location="United States",
16 |           source= 'google', maptype = 'terrain', color='bw', zoom=4) 
17 | 
18 | for (car in unique(rownames(mtcars))){
19 |   rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd', 
20 |                    output_file =  paste("report_", car, '_', Sys.Date(), ".html", sep=''), 
21 |                    output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
22 | # for pdf reports  
23 | #   rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd", 
24 | #           output_format = "pdf_document",
25 | #           output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
26 | #           output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
27 |   
28 | }
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/iterative_reporting/rmarkdown_script.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Example Car Sales Report"
 3 | author: "Rich Majerus"
 4 | output: html_document
 5 | ---
 6 | 
 7 | ```{r, echo=FALSE}
 8 | suppressWarnings(suppressPackageStartupMessages(library(ggplot2)))
 9 | suppressWarnings(suppressPackageStartupMessages(library(dplyr)))
10 | suppressWarnings(suppressPackageStartupMessages(library(leaflet)))
11 | suppressWarnings(suppressPackageStartupMessages(library(DT)))
12 | suppressWarnings(suppressPackageStartupMessages(library(stringr)))
13 | library(knitr)
14 | library(markdown)
15 | library(rmarkdown)
16 | ```
17 | 
18 | ```{r, echo=FALSE}
19 | cars <- mtcars[rownames(mtcars)==car,]
20 | 
21 | # create daily data 
22 | x <- sample(1:10, 1)
23 | 
24 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
25 | 
26 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
27 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
28 | 
29 | ```
30 | 
31 | Today is `r Sys.Date()`.  
32 | 
33 | Today we sold `r nrow(cars)` `r car`. 
34 | 
35 | <br><br>
36 | 
37 | Today we sold cars to people in the following locations: 
38 | ```{r, echo=FALSE}
39 | # map of cars sold
40 | leaflet(cars) %>%
41 |   addTiles() %>%
42 |   setView(-93.65, 42.0285, zoom = 3) %>%
43 |   addCircles(cars$lon, cars$lat) 
44 | ```
45 | 
46 | <br><br>
47 | 
48 | Here is a data table of the cars we sold. 
49 | ```{r, echo=FALSE}
50 | # data table of cars sold 
51 | datatable(cars[,c(1:2, 12:13)])
52 | ```
53 | 
54 | <br><br>
55 | 
56 | Here is a plot of mpg vs. weight for the sold cars: 
57 | ```{r, echo=FALSE}
58 | 
59 | # plot 
60 | ggplot(cars, aes(mpg, wt)) + geom_point(position = position_jitter(w = 0.1, h = 0.1)) + stat_smooth(method="lm", se=TRUE)
61 | 
62 | ```
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/portland mapping/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/portland mapping/.Rapp.history


--------------------------------------------------------------------------------
/portland mapping/GISwR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/portland mapping/GISwR.pdf


--------------------------------------------------------------------------------
/portland mapping/ggmaps.R:
--------------------------------------------------------------------------------
  1 | # open street map of reed 
  2 | 
  3 | gps <- read.csv("/Users/majerus/Desktop/2014 projects/portland mapping/elwyn.csv", 
  4 |                 header = TRUE)
  5 | 
  6 | library(ggmap)
  7 | 
  8 | ## Google Maps 
  9 | 
 10 | # satelite 
 11 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 12 |                                      lat = 33.824),
 13 |                         color = "color", # or bw
 14 |                         source = "google",
 15 |                         maptype = "satellite",
 16 |                         zoom = 17)
 17 | 
 18 | 
 19 | # terrain 
 20 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 21 |                                      lat = 33.824),
 22 |                         color = "color", # or bw
 23 |                         source = "google",
 24 |                         maptype = "terrain",
 25 |                         zoom = 17)
 26 | 
 27 | 
 28 | # roadmap 
 29 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 30 |                                      lat = 33.824),
 31 |                         color = "color", # or bw
 32 |                         source = "google",
 33 |                         maptype = "roadmap",
 34 |                         zoom = 17)
 35 | 
 36 | # hybrid 
 37 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 38 |                                      lat = 33.824),
 39 |                         color = "color", # or bw
 40 |                         source = "google",
 41 |                         maptype = "hybrid",
 42 |                         zoom = 17)
 43 | 
 44 | ## open street map
 45 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 46 |                                      lat = 33.824),
 47 |                         color = "color", # or bw
 48 |                         source = "osm",
 49 |                         zoom = 17)
 50 | 
 51 | 
 52 | ## stamen
 53 | 
 54 | # terrain 
 55 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude), 
 56 |                                      lat = 33.824),
 57 |                         color = "color", # or bw
 58 |                         source = "stamen",
 59 |                         maptype = "terrain",
 60 |                         zoom = 17)
 61 | 
 62 | 
 63 | 
 64 | pathcolor <- "#F8971F"
 65 | 
 66 | ggmap(mapImageData,
 67 |       extent = "device", # "panel" keeps in axes, etc.
 68 |       ylab = "Latitude",
 69 |       xlab = "Longitude",
 70 |       legend = "right") + 
 71 |   
 72 |   geom_path(aes(x = Longitude, # path outline
 73 |                 y = Latitude),
 74 |             data = gps,
 75 |             colour = "black",
 76 |             size = 2) +
 77 |   
 78 |   geom_path(aes(x = Longitude, # path
 79 |                 y = Latitude),
 80 |             colour = pathcolor,
 81 |             data = gps,
 82 |             size = 1.4) # +
 83 | # labs(x = "Longitude",
 84 | #   y = "Latitude") # if you do extent = "panel"
 85 | 
 86 | 
 87 | mapImageData <- get_map(location = c(lon = -122.630091, 
 88 |                                      lat = 45.480740),
 89 |                         color = "color", # or bw
 90 |                         source = "google",
 91 |                         maptype = "satellite",
 92 |                         zoom = 17)
 93 | 
 94 | 
 95 | ls(data)
 96 | attach(mydata)
 97 | plot(x, y) # scatterplot
 98 | identify(x, y, labels=row.names(mydata)) # identify points 
 99 | coords <- locator(type="l") # add lines
100 | coords # display list
101 | 
102 | ls(data)
103 | attach(mydata)
104 | plot(data$admit_rate, data$grad_rate) # scatterplot
105 | identify(data$admit_rate, data$grad_rate, labels=row.names(data)) # identify points 
106 | coords <- locator(type="l") # add lines
107 | coords # display list
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/portland mapping/mapping in r.R:
--------------------------------------------------------------------------------
 1 | # http://www.r-bloggers.com/the-openstreetmap-package-opens-up/
 2 | # http://rpubs.com/RobinLovelace/12696 
 3 | library(osmar)
 4 | library(OpenStreetMap)
 5 | 
 6 | src <- osmsource_api()
 7 | bb <- center_bbox(-122.688068, 45.521032, 1000, 1000)
 8 | ptown <- get_osm(bb, source = src)
 9 | plot(ptown)
10 | points(-1.53492, 53.81934, col = "red", lwd = 5)
11 | 
12 | 
13 | bikePaths <- find(ptown, way(tags(k == "bicycle" & v == "yes")))
14 | bikePaths <- find_down(ptown, way(bikePaths))
15 | bikePaths <- subset(ptown, ids = bikePaths)
16 | plot(ptown)
17 | plot_ways(bikePaths, add = T, col = "red", lwd = 3)
18 | 
19 | 
20 | library(ggmap)
21 | 
22 | stores <- data.frame(name=c("Commercial","Union","Bedford"),
23 |                      longitude=c(-70.25042295455933,-70.26050806045532,-70.27726650238037),
24 |                      latitude=c(43.657471302616806,43.65663299041943,43.66091757424481))
25 | location = c(-70.2954, 43.64278, -70.2350, 43.68093)
26 | 
27 | # Fetch the map
28 | portland = get_map(location = location, source = "osm")
29 | 
30 | # Draw the map
31 | portlandMap = ggmap(portland)
32 | 
33 | # Add the points layer
34 | portlandMap = portlandMap + geom_point(data = stores, aes(x = longitude, y = latitude), size = 5)
35 | 
36 | # Add the labels
37 | portlandMap + geom_text(data = stores, aes(label = name, x = longitude+.001, y = latitude), hjust = 0)
38 | 


--------------------------------------------------------------------------------
/rater reliability .R:
--------------------------------------------------------------------------------
 1 | 
 2 | # libraries ---------------------------------------------------------------
 3 | 
 4 | # install irr library if not already installed 
 5 | if( !is.element("irr", installed.packages()[,1]) )
 6 |   install.packages("irr")
 7 | 
 8 | #Load the irr library 
 9 | library(irr)  
10 | 
11 | # read in data and clean dataframe ----------------------------------------
12 | 
13 | # read in full data file (update file path to match data location on your computer)
14 | original.data <- read.csv('/Users/majerus/Desktop/linguistics_data.csv')
15 | 
16 | # drop summary statistics that are included in orginal file 
17 | data <- subset(original.data, !is.na(original.data$Test.Number))
18 | data$Average.Score..RA   <- NULL
19 | data$Average.Score..Participants	<- NULL
20 | data$Difference..RA...Participants.	<- NULL
21 | data$X <- NULL
22 | 
23 | # drop extra vars 
24 | data$Speaker <- NULL
25 | data$Speaker.From <- NULL
26 | data$Bows..Horizon <- NULL
27 | data$Test.Number  <- NULL
28 | 
29 | # make File name variable the rownames so that it is preserved as columns names once df is transposed
30 | rownames(data) <- data$File.Name
31 | data$File.Name <- NULL
32 | 
33 | # calculate inter-rater reliability between Molly and Dean using Cohen's Kappa----------------
34 | 
35 | ratings <- as.data.frame(cbind(Dean = data$RA..Dean, Molly = data$RA..Molly))
36 | 
37 | # kappa2(ratings, weight = c("unweighted", "equal", "squared"), sort.levels = FALSE)
38 | kappa2(ratings)
39 | 
40 | 
41 | 
42 | # calculate inter-rater reliability between all raters  -------------------
43 | 
44 | # transpose df 
45 | data.t <- as.data.frame(t(data))
46 | 
47 | # check class of each variable 
48 | sapply(data.t, class)
49 | 
50 | # convert df.t to matrix 
51 | matrix <- data.matrix(data.t)
52 | 
53 | 
54 | # Krippendorff ’s alpha
55 | 
56 | # kripp.alpha(x, method=c("nominal","ordinal","interval","ratio"))  (need to select right data level for method)
57 | kripp.alpha(matrix, method=c("ratio"))
58 | kripp.alpha(matrix, method=c("nominal"))
59 | 
60 | 
61 | # Light’s Kappa 
62 | 
63 | # transform data to factors for s Light’s Kappa which requires categorical data 
64 | sapply(data, class)
65 | data.factors <- as.data.frame(sapply(data, as.factor))
66 | kappam.light(data.factors)
67 | 
68 | 
69 | # Fleiss’ Kappa 
70 | 
71 | # Fleiss’ Kappa for m raters with categorical data 
72 | kappam.fleiss(data.factors, detail = TRUE, exact = FALSE)
73 | kappam.fleiss(data.factors, detail = TRUE)
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/read in ipeds data.R:
--------------------------------------------------------------------------------
 1 | install.packages(“devtools”)
 2 | 
 3 | require(devtools)
 4 | install_github('ipeds','jbryer')
 5 | 
 6 | library(ipeds)
 7 | 
 8 | ls('package:ipeds')
 9 | 
10 | data(surveys)
11 | names(surveys)
12 | 
13 | downloadAllSurveys(2013)
14 | 
15 | 


--------------------------------------------------------------------------------
/read_googlesheet.R:
--------------------------------------------------------------------------------
 1 | # survey url: https://docs.google.com/forms/d/1zLVTb8dix0tiWr0sVuRQAGsfjdMfQ5A5PmK_wDn9e7U/viewform?usp=send_form 
 2 | 
 3 | library(XML)
 4 | library(httr)
 5 | 
 6 | url <- "https://docs.google.com/spreadsheets/d/1CVQqfIEkbt9KUi3oxgE8_iQQnbN7CBIDqXqtNYSfsiw/pubhtml?gid=594213668&single=true"
 7 | 
 8 | readSpreadsheet <- function(url, sheet = 1){
 9 |   library(httr)
10 |   r <- GET(url)
11 |   html <- content(r)
12 |   sheets <- readHTMLTable(html, header=FALSE, stringsAsFactors=FALSE)
13 |   df <- sheets[[sheet]]
14 |   dfClean <- function(df){
15 |     nms <- t(df[1,])
16 |     names(df) <- nms
17 |     df <- df[-1,-1] 
18 |     df <- df[df[,1] != "",]   ## only select rows with time stamps
19 |     row.names(df) <- seq(1,nrow(df))
20 |     df
21 |   }
22 |   dfClean(df)
23 | }
24 | 
25 | df <- readSpreadsheet(url)
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/read_multiple_txt_files.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # install, update and load packages -----------------------------------------------
 3 | 
 4 | pkg <- c("stringr", "reshape2",  "dplyr", "ggplot2",  "magrittr")
 5 | 
 6 | new.pkg <- pkg[!(pkg %in% installed.packages())]
 7 | 
 8 | if (length(new.pkg)) {
 9 |   install.packages(new.pkg)
10 | }
11 | 
12 | library(stringr)
13 | library(reshape2)
14 | library(dplyr)
15 | library(ggplot2)
16 | 
17 | 
18 | # Read in data ------------------------------------------------------------
19 |           # update this file path to point toward appropriate folder on your computer
20 | folder <- "/Users/majerus/Desktop/thesis_projects/linguistics/Yevgeniy/exp1/"      # path to folder that holds multiple .csv files
21 | file_list <- list.files(path=folder, pattern="*.txt")                              # create list of all .csv files in folder
22 | 
23 | # read in each .csv file in file_list and rbind them into a data frame called data 
24 | data <- 
25 |   do.call("rbind", 
26 |           lapply(file_list, 
27 |                  function(x) 
28 |                  cbind(file = x, read.table(paste(folder, x, sep=''), 
29 |                             header = TRUE, 
30 |                             stringsAsFactors = FALSE))))
31 | 
32 | 
33 | # Clean data --------------------------------------------------------------
34 | 
35 | clean.data <- function(df){
36 |   df <- cbind(df, colsplit(df$stimulus, ',', names =  c('s1','s2', 's3')))
37 |   df$answer <- ifelse(str_count(df$stimulus, 'A') == 2, 'A', 'B')
38 |   df$correct <- ifelse(df$response == df$answer, 1, 0)
39 |   df$reactionTime <- as.numeric(df$reactionTime)
40 |   return(df)
41 | }
42 | 
43 | data <- clean.data(data)
44 | 
45 | 
46 | # Write out data ----------------------------------------------------------
47 | 
48 | write.csv(data, paste(folder,'cleaned_data.csv', sep = ''), row.names = FALSE)
49 | 
50 | 
51 | # Create data frame of summary statistics ---------------------------------
52 | 
53 | summary_stats <- 
54 |   data %>%
55 |   group_by(subject, correct, answer) %>%
56 |   summarise(count = n(),
57 |             mean_reactionTime = mean(reactionTime, na.rm = TRUE),
58 |             sd_reactionTime = sd(reactionTime, na.rm = TRUE),
59 |             min_reactionTime= min(reactionTime, na.rm = TRUE),
60 |             max_reactionTime = max(reactionTime, na.rm = TRUE))
61 | 
62 | 
63 | 
64 | # Write out data frame of summary statistics ------------------------------
65 | 
66 | write.csv(summary_stats, paste(folder,'summary_stats.csv', sep = ''), row.names = FALSE)
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/rename_colnames_base_on_crosswalk:
--------------------------------------------------------------------------------
1 | for (i in colnames(data)) { 
2 |   
3 |   colnames(data)[colnames(data) == i] <- as.character(col_cross[col_cross$original == i, 1])
4 |   
5 |   print(paste(i, "renamed to", col_cross[col_cross$original == i, 1]))
6 |   
7 | }
8 | 


--------------------------------------------------------------------------------
/rmaps.R:
--------------------------------------------------------------------------------
  1 | # http://rmaps.github.io/blog/posts/animated-choropleths/ 
  2 | #require(devtools)
  3 | #install_github('ramnathv/rCharts@dev')
  4 | #install_github('ramnathv/rMaps')
  5 | 
  6 | library(rMaps)
  7 | library(rCharts)
  8 | library(reshape2)
  9 | 
 10 | # change file path to match location on your machine
 11 | folder <- '/Users/majerus/Desktop/2014 projects/blog/post1_logs/'
 12 | 
 13 | # change file name to match name on your machine 
 14 | file <- 'state_enrollment_reed.csv'
 15 | 
 16 | # read in enrollment data 
 17 | state <- read.csv(paste(folder, file, sep=''))
 18 | 
 19 | # rename columns for reshape
 20 | colnames(state) <- c('State', '2007', '2008', '2009', '2010', '2011', '2012', '2013')
 21 | 
 22 | # reshape data from wide to long 
 23 | state_long <- melt(state)
 24 | 
 25 | # rename columns
 26 | colnames(state_long) <- c('State', 'Year', 'Students')
 27 | 
 28 | # check class of each variable
 29 | sapply(state_long, class)
 30 | 
 31 | # convert year to numeric 
 32 | state_long$Year <- as.integer(as.character(state_long$Year))
 33 | 
 34 | # convert state to character 
 35 | state_long$State <- as.character(state_long$State)
 36 | 
 37 | # convert students to numeric 
 38 | state_long$Students <- as.numeric(state_long$Students)
 39 | 
 40 | # change stage names to abbr. 
 41 | state_long$abr <- state.abb[match(as.character(state_long$State), state.name)]
 42 | 
 43 | # log 
 44 | state_long$Students_log <- ifelse(state_long$Students==0, 0, log(state_long$Students))
 45 | 
 46 | 
 47 | 
 48 | 
 49 | # no students from 
 50 | map <- 
 51 | ichoropleth(Students ~ abr,
 52 |             data = state_long,
 53 |             ncuts = 1,
 54 |             animate = 'Year', 
 55 |             play = TRUE, 
 56 |             legend = FALSE
 57 | )
 58 | 
 59 | map$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/no_students.html', cdn = TRUE)
 60 | 
 61 | 
 62 | 
 63 | ichoropleth(Students ~ abr,
 64 |             data = state_long,
 65 |             ncuts = 5,
 66 |             animate = 'Year', 
 67 |             play = TRUE, 
 68 |             legend = FALSE
 69 | )
 70 | 
 71 | slider <- 
 72 | MYchoropleth(Students ~ abr,
 73 |             data = state_long,
 74 |             animate = 'Year', 
 75 |             legend = TRUE
 76 | )
 77 | slider$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/slider.html', cdn = TRUE)
 78 | 
 79 | 
 80 | play <- 
 81 |   MYchoropleth(Students ~ abr,
 82 |                data = state_long,
 83 |                animate = 'Year', 
 84 |                legend = FALSE, 
 85 |                play=TRUE
 86 |   )
 87 | play$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/play.html', cdn = TRUE)
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | hist(state_long$Students_log)
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | MYchoropleth <- function(x, data, pal = "Blues", ncuts = 5, animate = NULL, play = F, map = 'usa', legend = TRUE, labels = TRUE, ...){
102 |   d <- Datamaps$new()
103 |   fml = lattice::latticeParseFormula(x, data = data)
104 |   data = transform(data, 
105 |                    fillKey = cut(
106 |                      fml$left, 
107 |                      c(-1,0, 5,10,25,50,100),
108 |                      ordered_result = TRUE
109 |                    )
110 |   )
111 |   fillColors =  c('white', RColorBrewer::brewer.pal(5, 'YlOrRd'))
112 |   d$set(
113 |     scope = map, 
114 |     fills = as.list(setNames(fillColors, levels(data$fillKey))), 
115 |     legend = legend,
116 |     labels = labels,
117 |     ...
118 |   )
119 |   if (!is.null(animate)){
120 |     range_ = summary(data[[animate]])
121 |     data = dlply(data, animate, function(x){
122 |       y = toJSONArray2(x, json = F)
123 |       names(y) = lapply(y, '[[', fml$right.name)
124 |       return(y)
125 |     })
126 |     d$set(
127 |       bodyattrs = "ng-app ng-controller='rChartsCtrl'"  
128 |     )
129 |     d$addAssets(
130 |       jshead = "http://cdnjs.cloudflare.com/ajax/libs/angular.js/1.2.1/angular.min.js"
131 |     )
132 |     if (play == T){
133 |       d$setTemplate(chartDiv = sprintf("
134 |                                        <div class='container'>
135 |                                        <button ng-click='animateMap()'>Play</button>
136 |                                        <div id='{{chartId}}' class='rChart datamaps'></div>  
137 |                                        </div>
138 |                                        <script>
139 |                                        function rChartsCtrl($scope, $timeout){
140 |                                        $scope.year = %s;
141 |                                        $scope.animateMap = function(){
142 |                                        if ($scope.year > %s){
143 |                                        return;
144 |                                        }
145 |                                        map{{chartId}}.updateChoropleth(chartParams.newData[$scope.year]);
146 |                                        $scope.year += 1
147 |                                        $timeout($scope.animateMap, 1000)
148 |                                        }
149 |                                        }
150 |                                        </script>", range_[1], range_[6])
151 |       )
152 |       
153 |     } else {
154 |       d$setTemplate(chartDiv = sprintf("
155 |                                        <div class='container'>
156 |                                        <input id='slider' type='range' min=%s max=%s ng-model='year' width=200>
157 |                                        <div id='{{chartId}}' class='rChart datamaps'></div>  
158 |                                        </div>
159 |                                        <script>
160 |                                        function rChartsCtrl($scope){
161 |                                        $scope.year = %s;
162 |                                        $scope.$watch('year', function(newYear){
163 |                                        map{{chartId}}.updateChoropleth(chartParams.newData[newYear]);
164 |                                        })
165 |                                        }
166 |                                        </script>", range_[1], range_[6], range_[1])
167 |       )
168 |     }
169 |     d$set(newData = data, data = data[[1]])
170 |     
171 |   } else {
172 |     d$set(data = dlply(data, fml$right.name))
173 |   }
174 |   return(d)
175 | }
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/rvest.R:
--------------------------------------------------------------------------------
  1 | # Libraries ---------------------------------------------------------------
  2 | 
  3 | if( !is.element("rvest", installed.packages()[,1]) )
  4 |   install.packages("rvest")
  5 | 
  6 | library(rvest)
  7 | library(stringr)
  8 | 
  9 | # read in data ------------------------------------------------------------
 10 | 
 11 | # read in list of ids and seasons 
 12 | list <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/LORTdata_tcgR.csv')
 13 | 
 14 | # create list of ids 
 15 | ids <- unique(list$MemberID)
 16 | 
 17 | ids <- ids[5:6]
 18 | 
 19 | 
 20 | # create function to read in data-----------
 21 |   
 22 | read.t.data <- function(id){
 23 |   seasons = c(17:22)
 24 |   
 25 |   results <-  do.call(rbind, lapply(1:length(seasons), function(i){
 26 |     
 27 |     url <- html(paste("http://www.tcg.org/tools/profiles/member_profiles/profile_detail.cfm?MemberID=", id, '&SeasonID=', seasons[i], sep=''))
 28 |     
 29 |     rankings <-
 30 |       url %>%
 31 |       html_nodes(".productions , #ProductionTitleRow td") %>%
 32 |       html_text() 
 33 |     
 34 |     rankings <- as.data.frame(rankings)
 35 |     
 36 |     if(nrow(rankings)>0) {
 37 |       rankings <- cbind(rankings, Season = seasons[i])
 38 |     } 
 39 |     
 40 |     return(rankings)
 41 |   }
 42 |   ))
 43 |   
 44 |   if(nrow(results)>0) {
 45 |     results <- cbind(id=id, results)
 46 |   } 
 47 |   
 48 |   return(results) 
 49 |   
 50 | }
 51 | 
 52 | 
 53 | # create function to scrape seating capacity in data-----------
 54 | 
 55 | capacity.pull <- function(id){
 56 |   seasons = c(17:22)
 57 |   
 58 |   results <-  do.call(rbind, lapply(1:length(seasons), function(i){
 59 |     
 60 |     url <- html(paste("http://www.tcg.org/tools/profiles/member_profiles/profile_detail.cfm?MemberID=", id, '&SeasonID=', seasons[i], sep=''))
 61 |     
 62 |     rankings <-
 63 |       url %>%
 64 |       html_nodes("tr:nth-child(10) td") %>%
 65 |       html_text() 
 66 |     
 67 |     rankings <- as.data.frame(rankings)
 68 |     
 69 |     if(nrow(rankings)>0) {
 70 |       rankings <- cbind(rankings, Season = seasons[i])
 71 |     } 
 72 |     
 73 |     return(rankings)
 74 |   }
 75 |   ))
 76 |   
 77 |   if(nrow(results)>0) {
 78 |     results <- cbind(id=id, results)
 79 |   } 
 80 |   
 81 |   return(results) 
 82 |   
 83 | }
 84 | 
 85 | 
 86 | 
 87 | # apply scraping function to list of ids 
 88 | data <- do.call(rbind, lapply(ids, read.t.data))
 89 | 
 90 | # pull capacity d
 91 | capacity <- do.call(rbind, lapply(ids, capacity.pull))
 92 | 
 93 | capacity <- subset(capacity, str_detect(capacity$rankings, 'Seating Capacity:')==TRUE)
 94 | capacity <- subset(capacity, str_detect(capacity$rankings, 'Facility Name:')==TRUE)
 95 | 
 96 | temp <- sub(".*\r\n", "", capacity$rankings)
 97 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Facility & Venue:'))))
 98 | 
 99 | 
100 | temp <- str_split(capacity$rankings, '\r\n')
101 | 
102 | 
103 | Facility Name:
104 | # write out data ----------------------------------------------------------
105 | 
106 | write.csv(data, '/Users/majerus/Desktop/thesis_projects/theather/Shabab/data.csv')
107 | 
108 | 
109 | 
110 | 
111 | # read in scraped data ----------------------------------------------------
112 | 
113 | messy <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/data.csv', row.names=1)
114 | 
115 | colnames(messy) <- c('id', 'Rankings', 'Season')
116 | 
117 | # pull out dates 
118 | row3 <- messy[seq(1, nrow(messy), 3), ]
119 | colnames(row3) <- c('id',  'dates',	'Season')
120 | 
121 | messy <- messy[-seq(1, NROW(messy), by = 3),]
122 | messy$Season <- NULL
123 | 
124 | 
125 | # pull out play names
126 | row2 <- messy[seq(1, nrow(messy), 2), ]
127 | row2$id <- NULL
128 | colnames(row2) <- c('play')
129 | 
130 | messy <- messy[-seq(1, NROW(messy), by = 2),]
131 | 
132 | # pull out extra info
133 | row1 <- messy
134 | row1$id <- NULL
135 | colnames(row1) <- c('extra')
136 | 
137 | # cbind data together 
138 | tidy <- cbind(row3, row2, row1)
139 | 
140 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$dates, '-'))))
141 | colnames(tidy) <- c("id",  "dates",    "Season", "play",   "extra",  "start", "end" )
142 | tidy$end <- str_sub(tidy$end, start = 1, end = 9)
143 | 
144 | tidy$dates <- NULL
145 | tidy$start <- as.Date(tidy$start, "%m/%d/%y")
146 | tidy$end <- as.Date(tidy$end, "%m/%d/%y")
147 | 
148 | tidy$days <- tidy$end - tidy$start
149 | 
150 | tidy$extra <- as.character(tidy$extra)
151 | 
152 | tidy$extra <- str_replace_all(tidy$extra, "[\r\n]", '')
153 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Facility & Venue:'))))
154 | 
155 | tidy$extra <- NULL
156 | 
157 | colnames(tidy) <- c('id',  'season',	'play',	'start',	'end',	'days',	'drop', 'venue')
158 | tidy$drop <- NULL
159 | 
160 | write.csv(tidy, '/Users/majerus/Desktop/thesis_projects/theather/Shabab/tidy_data.csv')
161 | 
162 | 
163 | tidy <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/tidy_data.csv')
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | tidy$extra <- str_replace_all(tidy$extra, "[^[:alnum:]]", " ")
179 | tidy$extra <- str_replace_all(tidy$extra, "Playwright s", '')
180 | tidy$extra <- str_trim(tidy$extra)
181 | 
182 | 
183 | 
184 | 
185 | temp <- cbind(tidy, t(as.data.frame(str_split_fixed(tidy$extra, '  ', 2))))
186 | 
187 | 
188 | 
189 | str_split_fixed(tidy$extra, '  ', 2)
190 | 
191 | 
192 | 
193 | gsub( ".*$", "", tidy$extra)
194 | sub("  *$","", tidy$extra, perl=T)
195 | 
196 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, '  '))))
197 | 
198 | str_split_fixed(tidy$extra, '  ', 2)
199 | 
200 | 
201 | 
202 | tidy$extra <- str_replace_all(tidy$extra, ' ', '')
203 | 
204 | 
205 | 
206 | 
207 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Lyricist'))))
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | tidy$extra <- NULL
215 | 
216 | colnames(tidy) <- c('id',	'Season',	'play',	'start',	'end',	'days',	'drop', 'venue')
217 | tidy$drop <- NULL
218 | 
219 | 
220 | 
221 | Facility & Venue:
222 | 
223 | 
224 | y=unlist(strsplit(tidy$extra,'[\r\n]'))
225 | sub('Facility & Venue:',y)
226 | 
227 | 
228 | Stage Director(s):
229 | 
230 | 
231 | 
232 | 
233 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Playwright'))))
234 | 
235 | 
236 | Lyricist(s):
237 | 
238 | 
239 | str_replace_all(x, "[\r\n]" , "")
240 | 
241 | 
242 | str_replace_all(string=a, pattern=" ", repl="")
243 | 
244 | temp <- as.data.frame(str_split(tidy$extra, '"[\r\n]"'))
245 | 
246 | 
247 |   
248 |   
249 | sapply(tidy, class)
250 | 
251 | 
252 | 
253 | k <- function(dataframe, n)dataframe[seq(n,to=nrow(dataframe),by=n),]
254 | 
255 | 
256 | 
257 | 
258 | row3 <- messy[seq(1, length(messy), 3)]
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | messy$element <- rep(c('date', 'play', 'extra'))
267 | messy$count <- rep(1:3)
268 | 
269 | 
270 | tidier <- messy %>%
271 |   gather(key, time, -id, -Season)
272 | tidier %>% head(8)
273 | 
274 | messy$id.unique <- paste(messy$id, messy$Season, messy$count, sep='')
275 | 
276 | tidy <- 
277 |   messy %>%
278 |   spread(id.unique, Rankings, fill = NA, convert = FALSE, drop = TRUE)
279 | 
280 | 
281 | Season
282 | count 
283 | 
284 | 
285 | temp <- dcast(messy, id + Season + count~element, value.var="Rankings")
286 | 
287 | duplicated(messy$id.unique)
288 | 
289 | 
290 | , value.var="Rankings"
291 | 
292 | ls(messy)
293 | 
294 | 
295 | 
296 | temp <- 
297 | reshape(messy, direction = 'wide', idvar = c('id', 'Season', 'count'), timevar = 'element', 
298 |         v.names = 'test_result', sep = "_")
299 | 
300 | 
301 | 
302 | 
303 | 


--------------------------------------------------------------------------------
/ts_graphs.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(dplyr)
 3 | library(lubridate)
 4 | library(xts)
 5 | library(stringr)
 6 | library(reshape2)
 7 | library(plyr)
 8 | library(grid)
 9 | 
10 | 
11 | data <- read.csv('/Users/majerus/Desktop/thesis_projects/makoto/data.csv')
12 | data$X <- NULL
13 | data$Carus.Spangler.Rd. <- NULL
14 | #data[is.na(data)] <- 0
15 | 
16 | data <- dplyr::rename(data, time = Date...Time)
17 | 
18 | # id 
19 | # data$id <- id(data[c("time")])
20 | data <- mutate(data, id = rownames(data))              
21 |  
22 | 
23 | 
24 | data$time <- str_replace(data$time, 'AM', '')
25 | data$time <- str_replace(data$time, 'PM', '')
26 | data$time <- str_trim(data$time, 'both')
27 | data$time <- str_replace(data$time, '2013', '13')
28 | data$time <- str_replace(data$time, '2014', '13')
29 | data$time <- str_replace(data$time, '2015', '13')
30 | #data$time <- str_replace(data$time, '2013', '13')
31 | #data$time <- str_replace(data$time, '24:00', '23:59')
32 | 
33 | #data$time <- strptime(data$time, '%m/%d/%y  %H:%M')
34 | 
35 | data$Albany.Calapooia.School  <- as.numeric(data$Albany.Calapooia.School )
36 | data$Beaverton.Highland.Park	<- as.numeric(data$Beaverton.Highland.Park)
37 | data$Corvallis.Circle.Blvd	<- as.numeric(data$Corvallis.Circle.Blvd)
38 | data$Hillsboro.Hare.Field	<- as.numeric(data$Hillsboro.Hare.Field)
39 | data$Portland.SE.Lafayette <- as.numeric(data$Portland.SE.Lafayette)
40 | data$Salem.State.Hospital	<- as.numeric(data$Salem.State.Hospital)
41 | data$Sauvie.Island	<- as.numeric(data$Sauvie.Island)
42 | data$Sweet.Home.Fire.Department <- as.numeric(data$Sauvie.Island)
43 | 
44 | long_data <- melt(data, id=c("id", "time"), na.rm=TRUE)
45 | 
46 | long_data$time <- strptime(long_data$time, '%m/%d/%y  %H:%M')
47 | 
48 | long_data$log <- log(long_data$value)
49 | 
50 | 
51 | long_data$Date<-with(long_data,as.Date(time, format = "%Y/%m/%d"))
52 | graph <- ddply(long_data,.(variable, Date),summarise, ave=mean(value))
53 | 
54 | p <- 
55 | ggplot(aes(Date, ave, colour = variable), data = graph) + 
56 |   geom_line()  +
57 |   ggtitle("Average by Day")
58 | 
59 | p <- 
60 | p + annotate("text", x = as.Date(Inf), y = -Inf, label = "Created by Rich Majerus",
61 |              hjust=1.1, vjust=-1.1, col="white", cex=6,
62 |              fontface = "bold", alpha = 0.8) +
63 |   facet_wrap( ~  variable, ncol=3) 
64 | 
65 | ggsave(p, file="/Users/majerus/Desktop/thesis_projects/makoto/ts.pdf", scale=2)
66 | 


--------------------------------------------------------------------------------
/tutorials/Histograms advanced.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Histograms II"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | <br>
 8 | <br>
 9 | 
10 | ####Install and load ggplot
11 | ```{r, warning=FALSE, message=FALSE}
12 | # install libraries 
13 | # This demo requires the 'ggplot' package 
14 | if( !is.element("ggplot2", installed.packages()[,1]) )
15 |   install.packages("ggplot2")
16 | 
17 | # load libraries 
18 | library(ggplot2)
19 | ```
20 | 
21 | <br>
22 | <br>
23 | 
24 | ####Download and Load Data
25 | ```{r}
26 | download.file("http://www.openintro.org/stat/data/evals.RData", destfile = "evals.RData")
27 | load("evals.RData")
28 | ```
29 | 
30 | <br>
31 | <br>
32 | 
33 | ####Create Histogram Plot Function
34 | ```{r, message=FALSE}
35 | 
36 | # this function will create a histogram of every variable in your data frame
37 | # the function takes one argument ('data') which is the name of your data frame 
38 | # if you want to save the scatter plots as .png files define the file path for graphs_folder below 
39 | # you will also need to remove the "#" from the two lines in the function that are commented out and place a "#" before print
40 | 
41 | # save graphs in this folder
42 | graphs_folder <- '/filepath/graphs/'
43 | 
44 | hist <- function(x, na.rm = TRUE, ...) {
45 |   nm <- names(x)
46 |   for (i in seq_along(nm)) {
47 |     print(ggplot(x,aes_string(x = nm[i])) + geom_histogram(alpha=.8, fill = "darkblue") + theme_classic()) }
48 |     #plots <- ggplot(x,aes_string(x = nm[i])) + geom_histogram(alpha=.8, fill = "darkblue") + theme_classic() 
49 |     #ggsave(plots,filename=paste(graphs_folder, "hist_",nm[i],".png",sep=""))}
50 | }
51 | 
52 | ```
53 | 
54 | <br>
55 | <br>
56 | 
57 | ####Run Histogram Plot Function to Create Plots for an Entire Data Frame
58 | ```{r, message=FALSE, warning=FALSE}
59 | hist(evals)
60 | ```
61 | 
62 | 
63 | 
64 | ####Create Density Plot Function
65 | ```{r, message=FALSE}
66 | 
67 | den <- function(x, na.rm = TRUE, ...) {
68 |   x <- x[,sapply(x,is.numeric) | sapply(x,is.integer)]
69 |   nm <- names(x)
70 |   for (i in seq_along(nm)) {
71 |     print(ggplot(x,aes_string(x = nm[i])) + geom_density(alpha=.8, fill = "darkgreen") + theme_classic())}  
72 | }
73 | 
74 | ```
75 | 
76 | <br>
77 | <br>
78 | 
79 | ####Run Density Plot Function to Create Kernal Density Plots for All Continuous Variables in a Data Frame 
80 | ```{r, message=FALSE, warning=FALSE}
81 | den(evals)
82 | ```
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/tutorials/colors.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Package RColorBrewer with ggplot2"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br />
  8 | 
  9 | #####Load Data 
 10 | ```{r}
 11 | 
 12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
 13 | data <- read.csv("/Users/majerus/Downloads/ames.csv")   # update to file path on your computer 
 14 | 
 15 | ```
 16 | 
 17 | <br />
 18 | 
 19 | #####Load Packages
 20 | 
 21 | ```{r}
 22 | 
 23 | library(plyr)
 24 | library(ggplot2)
 25 | library(ggthemes)
 26 | library(scales)
 27 | library(reshape2)
 28 | library(RColorBrewer)
 29 | ```
 30 | 
 31 | <br />
 32 | 
 33 | 
 34 | #####Calculate Mean Sale Price by Year and Building Type 
 35 | 
 36 | ```{r}
 37 | 
 38 | # show count of building types by year of sale
 39 | table(data$Yr.Sold, data$Bldg.Type)
 40 | 
 41 | # create data frame with the mean sale price for each combintation of year and type
 42 | means <- ddply(data, .(Yr.Sold, Bldg.Type), summarize,     
 43 |                mean_price = mean(SalePrice))
 44 | 
 45 | # show first 6 rows of new data frame
 46 | head(means)
 47 | 
 48 | # show table of mean sale price by year and type
 49 | dcast(means, Yr.Sold ~ Bldg.Type)
 50 | 
 51 | ```
 52 | 
 53 | <br />
 54 | 
 55 | #####Plot Mean Sale Price by Year and Building Type (manually assign colors)
 56 | 
 57 | ```{r}
 58 | 
 59 | # define colors by name
 60 | 
 61 | p <-
 62 |   ggplot(means, aes(Yr.Sold, mean_price, group = Bldg.Type, colour = Bldg.Type)) + 
 63 |   geom_line(size=2) +
 64 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
 65 |   scale_x_continuous("Year") +
 66 |   ggtitle("Mean Home Sale Price in Ames, IA") +
 67 |   theme_tufte() +
 68 |   theme(plot.title = element_text(size = 16, face="bold")) 
 69 | 
 70 | p +  scale_colour_manual(values = c("red","blue", "dark green", "grey", "black"))
 71 |     
 72 | 
 73 | ```
 74 | 
 75 | 
 76 | ```{r}
 77 | 
 78 | # define colors by hex code 
 79 | # see http://www.w3schools.com/tags/ref_colorpicker.asp & http://colorbrewer2.org
 80 | 
 81 | p +  scale_colour_manual(values = c("#0000FF","#197519", "#CC2900", "#4700B2", "#E6E600"))
 82 |     
 83 | 
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | #####Plot Mean Sale Price by Year and Building Type (use  assign colors RColorBrewer)
 89 | 
 90 | ```{r}
 91 | # use display.brewer.all() to see all options
 92 | 
 93 | p + scale_colour_brewer("Colors in Set1", palette="Set1")
 94 |     
 95 | 
 96 | ```
 97 | 
 98 | ```{r}
 99 | 
100 | p + scale_colour_brewer("Colors in Paired", palette="Paired")
101 |   
102 | ```
103 | 
104 | ```{r}
105 | 
106 | p +  scale_colour_brewer("Colors in Spectral", palette="Spectral")
107 |     
108 | ```
109 | 
110 | 
111 | ```{r}
112 | 
113 | p + scale_colour_brewer("Colors in Dark2", palette="Dark2")
114 |     
115 | 
116 | ```
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/tutorials/create_variables.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Creating Variables in RSudio"
 3 | date: Reed College, Instructional Technology Services
 4 | output: html_document
 5 | ---
 6 | <br />          
 7 | ```{r}
 8 | # load the mtcars data
 9 | data(mtcars)
10 | ```
11 | 
12 | ```{r}
13 | head(mtcars) # Look at the first 6 rows of your data 
14 | ```
15 | <br />    
16 | 
17 | **Create a constant** 
18 | ```{r}
19 | # we can create a constant that is always '1' 
20 | mtcars$constant <- 1 
21 | head(mtcars)
22 | ```
23 | <br />    
24 | 
25 | **Create a variable from existing variables** 
26 | ```{r}
27 | # Ratio of horse power to cylinders
28 | mtcars$hp_c <- mtcars$hp/mtcars$cyl
29 | head(mtcars)
30 | ```
31 | <br />    
32 | 
33 | **Create a variable based on the values of existing variables** 
34 | ```{r}
35 | # dummy variable to indicate if a car gets more than 20 mpg
36 | # use the "ifelse" command
37 | # ifelse(condition, if meets condition give variable this value, otherwise give variable this value)
38 | mtcars$mpg20 <- ifelse(mtcars$mpg > 20, 1, 0)
39 | head(mtcars)
40 | ```
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/tutorials/creating html tables.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Creating HTML Tables"
 3 | author: "Rich Majerus"
 4 | date: "November 11, 2014"
 5 | output: html_document
 6 | ---
 7 | 
 8 | <br />
 9 | 
10 | #####HTML Tables with xtable Package
11 | ```{r, results='asis'}
12 | library(xtable)
13 | library(plyr)
14 | 
15 | data(mtcars)
16 | 
17 | # create a data frame that contains mean mpg values by the number of cylinders
18 | summary_data <- ddply(mtcars, .(cyl), summarise, mean_mpg = mean(mpg))
19 | 
20 | # rename columns in data frame 
21 | colnames(summary_data) <- c('Cylinders', 'Mean MPG')
22 | 
23 | # Create in print xtable in html
24 | print(xtable(summary_data, 
25 |              caption="Mean MPG by Cylinders"), 
26 |               "html", include.rownames=FALSE, caption.placement='top',
27 |                html.table.attributes='align="left"')
28 | 
29 | ```
30 | <br />
31 | 
32 | 
33 | #####HTML Tables with knitr Package
34 | 
35 | ```{r, results='asis'}
36 | 
37 | library(knitr)
38 | 
39 | kable(head(mtcars), digits=2)
40 | 
41 | ```
42 | 
43 | 
44 | <br />
45 | 
46 | 
47 | #####HTML Tables with googleVis Package
48 | ```{r, results='asis', warning=FALSE}
49 | suppressMessages(library(googleVis))
50 | 
51 | cars <- cbind(car = rownames(mtcars), mtcars)
52 | 
53 | table <- gvisTable(cars, 
54 |               #formats=list(Population="#,###")  
55 |               options=list(page='enable'))
56 | 
57 | print(table)
58 | ```
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/tutorials/dplyr.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "dplyr Introduction"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br/>
  8 | 
  9 | ##### Load Packages and Data
 10 | 
 11 | ```{r, warning=FALSE}
 12 | # load dplyr package 
 13 | suppressMessages(library(dplyr))
 14 | 
 15 | # create example dataframe
 16 | cars <- cbind(car = rownames(mtcars), mtcars)
 17 | rownames(cars) <- NULL
 18 | 
 19 | ```
 20 | 
 21 | <br/>
 22 | 
 23 | ##### dplyr verbs 
 24 | 
 25 | ```{r}
 26 | # filter - subset rows of a data frame / filter(df, how to subset)
 27 | filter(cars, mpg > 25)
 28 | filter(cars,  mpg > 25 & hp > 75)
 29 | 
 30 | # slice - subset rows of a data frame by position / slice(df, rows to keep)
 31 | slice(cars, 1:5)
 32 | slice(cars, c(1:3, 11:13))
 33 | 
 34 | # arrange - order rows of a data frame / slice(df, column names to order by)
 35 | head(arrange(cars, mpg))
 36 | head(arrange(cars, desc(mpg)))
 37 | head(arrange(cars, desc(cyl), desc(mpg)))
 38 | 
 39 | # select - subset columns of a data frame / select(df, names of columns to keep)
 40 | head(select(cars, car, mpg))
 41 | head(select(cars, car:hp))
 42 | head(select(cars, -(car:hp)))
 43 | 
 44 | # select and other dplyr verbs work with starts_with(), ends_with(), matches() and contains()
 45 | head(select(cars, starts_with('c')))
 46 | 
 47 | # select is often used with distinct - returns table of all unique values 
 48 | distinct(select(cars, vs, cyl))
 49 | 
 50 | # rename - rename columns of a data frame / rename(df, new name = old name)
 51 | head(rename(cars, automobile = car))
 52 | 
 53 | # mutate - create new columns / mutate(df, new column name = formula for new column)
 54 | head(mutate(cars, hp_to_wt = hp/wt))
 55 | 
 56 | ```
 57 | 
 58 | 
 59 | <br/>
 60 | 
 61 | ##### Chaining Syntax
 62 | 
 63 | ```{r}
 64 | # count number of cars with each number of cylinders and put in descending order 
 65 | # n() - counts number of rows in a group
 66 | cars %>%
 67 |   group_by(cyl) %>%
 68 |   summarise(cyl_count=n()) %>%
 69 |   arrange(desc(cyl_count))
 70 | 
 71 | # calculate mean mpg by number of cylinders 
 72 | cars %>%
 73 |   group_by(cyl) %>%
 74 |   summarise(mean_mpg = mean(mpg, na.rm = TRUE))
 75 | 
 76 | # calculate mean mpg and wt by number of cylinders 
 77 | # sumarise_each - applies the same function to multiple columns 
 78 | cars %>%
 79 |   group_by(cyl) %>%
 80 |   summarise_each(funs(mean(., na.rm = TRUE)), mpg, wt)
 81 |     
 82 | # calculate mean, min, max and sd of mpg and wt rates by number of cyl 
 83 | cars %>%
 84 |   group_by(cyl) %>%
 85 |   summarise_each(funs(mean(., na.rm = TRUE), 
 86 |                       min(., na.rm = TRUE), 
 87 |                       max(., na.rm = TRUE), 
 88 |                       sd(., na.rm = TRUE)), 
 89 |                       mpg, wt)
 90 | 
 91 | ```
 92 | 
 93 | <br/>
 94 | 
 95 | ##### Sampling
 96 | 
 97 | ```{r}
 98 | 
 99 | # sample 5 rows 
100 | cars %>% sample_n(5)
101 | 
102 | # sample 10% of rows  
103 | cars %>% 
104 |   sample_frac(.1, replace = FALSE)
105 | 
106 | ```
107 | 


--------------------------------------------------------------------------------
/tutorials/evals.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/evals.RData


--------------------------------------------------------------------------------
/tutorials/excel.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Load data from multiple Excel worksheets"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | <BR>
 7 | <BR>
 8 | 
 9 | #### Introduction
10 | The following code allows you to read in data from each page of an Excel workbook into a list of data frames in R. Then the code will run a function to clean the data in each of those data frames. Lastly, the data frames are joined together into one data frame for analysis. 
11 | 
12 | <BR>
13 | 
14 | #### Install and Load XLConnect Package
15 | ```{r, eval=FALSE}
16 | 
17 | # install and load packages -----------------------------------------------
18 | pkg <- c("XLConnect")
19 | 
20 | new.pkg <- pkg[!(pkg %in% installed.packages())]
21 | 
22 | if (length(new.pkg)) {
23 |   install.packages(new.pkg)
24 | }
25 | 
26 | library(XLConnect)
27 | 
28 | ```
29 | 
30 | <BR>
31 | 
32 | #### Read in Data from Excel Worksheets
33 | ```{r, eval=FALSE}
34 | 
35 | # load excel workbook
36 | excel <- loadWorkbook("filepath/ExcelData.xlsx") # change to match your path
37 | 
38 | # get sheet names
39 | sheet_names <- getSheets(excel)
40 | names(sheet_names) <- sheet_names
41 | 
42 | # put sheets into a list of data frames
43 | sheet_list <- lapply(sheet_names, function(.sheet){readWorksheet(object=excel, .sheet)})
44 | 
45 | # limit sheet_list to sheets with at least 1 dimension 
46 | sheet_list2 <- sheet_list[sapply(sheet_list, function(x) dim(x)[1]) > 0]
47 | ```
48 | 
49 | <BR>
50 | 
51 | #### Define and Run Function to Clean Data
52 | ```{r, eval=FALSE}
53 | 
54 | # code to read in each excel worksheet as individual dataframes
55 | # for (i in 2:length(sheet_list2)){assign(paste0("df", i), as.data.frame(sheet_list2[i]))}
56 | 
57 | # define function to clean data in each data frame (updated based on your data)
58 | cleaner <- function(df){
59 |   # drop rows with missing values 
60 |   df <- df[rowSums(is.na(df)) == 0,] 
61 |   # remove serial comma from all variables 
62 |   df[,-1] <- as.numeric(gsub(",", "", as.matrix(df[,-1])))
63 |   # create numeric version of year variable for graphing 
64 |   df$Year <- as.numeric(substr(df$year, 1, 4))
65 |   # return cleaned df      
66 |   return(df)
67 | }
68 | 
69 | # clean sheets and create one data frame
70 | # data <- do.call(rbind,lapply(seq_along(sheet_list2), function(x) cleaner(sheet_list2[[x]])))
71 | data <- do.call(rbind,lapply(names(sheet_list2), function(x) cleaner(sheet_list2[[x]])))
72 | 
73 | 
74 | ```
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/tutorials/facets.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Graphing and Facets"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br />
  8 | 
  9 | #####Load Data 
 10 | ```{r}
 11 | 
 12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
 13 | data <- read.csv("/Users/majerus/Downloads/ames.csv")   # update to file path on your computer 
 14 | 
 15 | ```
 16 | 
 17 | <br />
 18 | 
 19 | #####Load Packages
 20 | 
 21 | ```{r}
 22 | 
 23 | library(plyr)
 24 | library(ggplot2)
 25 | library(ggthemes)
 26 | library(scales)
 27 | library(reshape2)
 28 | ```
 29 | 
 30 | <br />
 31 | 
 32 | #####Calculate Mean Sale Price by Year 
 33 | 
 34 | ```{r}
 35 | 
 36 | mean <- ddply(data, .(Yr.Sold), summarize,     
 37 |                mean_price = mean(SalePrice))
 38 | 
 39 | ```
 40 | 
 41 | <br />
 42 | 
 43 | #####Calculate Mean Sale Price by Year and Sale Condition 
 44 | 
 45 | ```{r}
 46 | 
 47 | # show count of sale conditions by year of sale
 48 | table(data$Yr.Sold, data$Sale.Condition)
 49 | 
 50 | # create data frame with the mean sale price for each combintation of year and condition
 51 | mean.facet<- ddply(data, .(Yr.Sold, Sale.Condition), summarize,     
 52 |                mean_price = mean(SalePrice))
 53 | 
 54 | # show first 6 rows of new data frame
 55 | head(mean.facet)
 56 | 
 57 | # show table of mean sale price by year and condition
 58 | dcast(mean.facet, Yr.Sold ~ Sale.Condition)
 59 | 
 60 | ```
 61 | 
 62 | <br />
 63 | 
 64 | #####Plot Mean Sale Price by Year 
 65 | 
 66 | ```{r}
 67 | 
 68 | ggplot(mean, aes(Yr.Sold, mean_price)) + 
 69 |   geom_line(color="dark blue", size=2) +
 70 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
 71 |   scale_x_continuous("Year") +
 72 |   ggtitle("Mean Home Sale Price in Ames, IA") +
 73 |   theme_tufte() +
 74 |   theme(plot.title = element_text(size = 16, face="bold"))
 75 | 
 76 | ```
 77 | 
 78 | <br />
 79 | 
 80 | #####Plot Mean Sale Price by Year and Home Type
 81 | 
 82 | ```{r}
 83 | 
 84 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) + 
 85 |   geom_line(size=2) +
 86 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
 87 |   scale_x_continuous("Year") +
 88 |   ggtitle("Mean Home Sale Price in Ames, IA") +
 89 |   theme_tufte() +
 90 |   theme(plot.title = element_text(size = 16, face="bold"))
 91 | 
 92 | ```
 93 | 
 94 | 
 95 | <br />
 96 | 
 97 | #####Plot Mean Sale Price by Year and Home Type (Facets)
 98 | 
 99 | ```{r}
100 | 
101 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) + 
102 |   geom_line(size=2) +
103 |   facet_wrap( ~  Sale.Condition, ncol=1) +
104 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
105 |   scale_x_continuous("Year") +
106 |   ggtitle("Mean Home Sale Price in Ames, IA") +
107 |   theme_tufte() +
108 |   theme(plot.title = element_text(size = 16, face="bold"))
109 | 
110 | ```
111 | 
112 | 
113 | 
114 | 
115 | ```{r}
116 | 
117 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) + 
118 |   geom_line(size=2) +
119 |   facet_wrap( ~  Sale.Condition, ncol=6) +
120 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
121 |   scale_x_continuous("Year") +
122 |   ggtitle("Mean Home Sale Price in Ames, IA") +
123 |   theme_tufte() +
124 |   theme(plot.title = element_text(size = 16, face="bold"),
125 |         axis.text.x = element_text(angle = 45, hjust = 1))
126 | 
127 | ```
128 | 
129 | 
130 | 
131 | 
132 | 
133 | ```{r}
134 | 
135 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) + 
136 |   geom_line(size=2) +
137 |   facet_wrap( ~  Sale.Condition, ncol=2) +
138 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
139 |   scale_x_continuous("Year") +
140 |   ggtitle("Mean Home Sale Price in Ames, IA") +
141 |   theme_tufte() +
142 |   theme(plot.title = element_text(size = 16, face="bold"))
143 | 
144 | ```
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/tutorials/geocoder.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Geocoder & Coordinate Conversion"
  3 | author: "Rich Majerus & Kristin Bott"
  4 | date: "March 4, 2015"
  5 | output: html_document
  6 | ---
  7 | <br/>  
  8 | 
  9 | ### Intro 
 10 | Some introductory text could go here....    
 11 | 
 12 | <br/>  
 13 |   
 14 | 
 15 | A word about packages here...
 16 | 
 17 | ```{r, warning=FALSE}
 18 | library(stringr)
 19 | library(httr)
 20 | library(rjson)
 21 | suppressMessages(library(dplyr))
 22 | #library(devtools)
 23 | #if (!require("leaflet")) devtools::install_github("rstudio/leaflet")
 24 | library(leaflet)
 25 | ```
 26 | 
 27 | <br/>
 28 | 
 29 | ### Geocoding function...
 30 | Some text about geocoding function here  
 31 | 
 32 | <br/>
 33 | 
 34 | ```{r, echo=FALSE}
 35 | 
 36 | # create geocode function with tryCatch for errors
 37 | geo.dsk <- function(addr){ 
 38 |   require(httr)
 39 |   require(rjson)
 40 |   
 41 |   out <- tryCatch({
 42 |   
 43 |   url      <- "http://www.datasciencetoolkit.org/maps/api/geocode/json"
 44 |   response <- GET(url,query=list(sensor="FALSE",address=addr))
 45 |   json <- fromJSON(content(response,type="text"))
 46 |   loc  <- json['results'][[1]][[1]]$geometry$location
 47 |   return(c(address=addr,long=loc$lng, lat= loc$lat))
 48 |   }, 
 49 |     
 50 |   error = function(cond) {
 51 |             message(paste("Address not geocoded:", addr))
 52 |             message("Here's the original error message:")
 53 |             message(cond)
 54 |             # Choose a return value in case of error
 55 |             return(NA)
 56 |         },  
 57 | 
 58 |  warning = function(cond) {
 59 |             message(paste("Address caused a warning:", addr))
 60 |             message("Here's the original warning message:")
 61 |             message(cond)
 62 |             # Choose a return value in case of warning
 63 |             return(NULL)
 64 |         },
 65 | 
 66 |  finally = {
 67 |             message(paste("Processed Address:", addr))
 68 |             message("One down...")
 69 |         }
 70 |     )    
 71 |     return(out)
 72 | }
 73 | 
 74 | 
 75 | ```
 76 | 
 77 | <br/>
 78 | 
 79 | Here is an example...
 80 | 
 81 | <br/>
 82 | 
 83 | ```{r}
 84 | 
 85 | geo.dsk("Reed College, Portland, OR")
 86 | 
 87 | ```
 88 | 
 89 | 
 90 | <br/>
 91 | 
 92 | This works on data frames too!!
 93 | 
 94 | <br/>
 95 | 
 96 | ```{r}
 97 | 
 98 | name   <- c('Carleton College', 'Pomona College', 'Reed College')
 99 | street <- c('300 North College St', '333 N College Way', '3203 SE Woodstock Blvd') 
100 | city   <- c("Northfield", "Claremont", "Portland") 
101 | state  <- c('MN', 'CA', 'OR') 
102 | zip    <- c('55057', '91711', '97202')
103 | 
104 | data   <- data.frame(name, street, city, state, zip)      
105 | 
106 | 
107 | # create location variable 
108 | 
109 | data$location <- paste(str_trim(as.character(data$street)),
110 |                        str_trim(as.character(data$city)),
111 |                        str_trim(as.character(data$state)),
112 |                        str_trim(as.character(data$zip)), sep=' ')
113 | 
114 | 
115 | # geocode data and bind coordinates onto data
116 | 
117 | result <- cbind(name= data$name,
118 |                 as.data.frame(do.call(rbind,
119 |                               lapply(as.character(data$location), geo.dsk))))
120 | 
121 | print(result)
122 | 
123 | ```
124 | 
125 | 
126 | <br/>
127 | 
128 | ### Coordinate Conversion function...
129 | 
130 | If we want these data frames in another coordinate system we can simply convert them...
131 | 
132 | <br/>
133 | 
134 | ```{r}
135 | 
136 | # coordinate conversion function 
137 | degrees2meters = function(lon, lat) {
138 |   x = lon * 20037508.34 / 180;
139 |   y = log(tan((90 + lat) * pi / 360)) / (pi / 180);
140 |   y = y * 20037508.34 / 180;
141 |   z <- paste('POINT(', x,' ', y, ')', sep='')
142 |   return(z)
143 | }
144 | 
145 | 
146 | 
147 | ```
148 | 
149 | <br/>
150 | Here is the first example converted....
151 | <br/>
152 | 
153 | ```{r}
154 | 
155 | # coordinate conversion function 
156 | degrees2meters = function(lon, lat) {
157 |   x = lon * 20037508.34 / 180;
158 |   y = log(tan((90 + lat) * pi / 360)) / (pi / 180);
159 |   y = y * 20037508.34 / 180;
160 |   z <- paste('POINT(', x,' ', y, ')', sep='')
161 |   return(z)
162 | }
163 | 
164 | 
165 | degrees2meters(-122.629179, 45.479171)
166 | 
167 | 
168 | ```
169 | 
170 | <br/>
171 | This will work on data frames too! 
172 | <br/>
173 | 
174 | ```{r}
175 | 
176 | result.converted <- cbind(name = result$name,
177 |                           as.data.frame(mapply(degrees2meters, as.numeric(result$long), as.numeric(result$lat))))
178 | 
179 | colnames(result.converted) <- c('name', 'coords')
180 | 
181 | 
182 | print(result.converted)
183 | 
184 | 
185 | ```
186 | <br/>
187 | 
188 | ### Last Step...make a map
189 | 
190 | <br/>
191 | ```{r}
192 | 
193 | # create map
194 | 
195 | leaflet(result) %>%
196 |   addTiles() %>%
197 |   setView(-93.65, 42.0285, zoom = 3) %>%
198 |   addCircles(result$long, result$lat) %>%
199 |   addPopups(result$long, result$lat, paste(result$name, '!', sep=''))
200 | 
201 | 
202 | ```
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/tutorials/geocoding.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Geocoding"
  3 | author: "Instructional Technology Services, Reed College"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br/>
  8 | 
  9 | ```{r, warning=FALSE}
 10 | 
 11 | # load and install packages
 12 | 
 13 | pkg <- c("httr", "rjson", "dplyr", "stringr", "devtools", "leaflet")
 14 | new.pkg <- pkg[!(pkg %in% installed.packages())]
 15 | if (length(new.pkg)) {
 16 |   install.packages(new.pkg)
 17 | }
 18 | 
 19 | suppressMessages(library(httr))
 20 | suppressMessages(library(rjson))
 21 | suppressMessages(library(dplyr))
 22 | suppressMessages(library(stringr))
 23 | suppressMessages(library(devtools))
 24 | 
 25 | suppressMessages(if (!require("leaflet")) devtools::install_github("rstudio/leaflet"))
 26 | suppressMessages(library(leaflet))
 27 | ```
 28 | 
 29 | <br/>
 30 | 
 31 | ```{r}
 32 | 
 33 | # create sample data frame of addresses to geocode
 34 | 
 35 | name   <- c('Carleton College', 'Pomona College', 'Reed College')
 36 | street <- c('300 North College St', '333 N College Way', '3203 SE Woodstock Blvd') 
 37 | city   <- c("Northfield", "Claremont", "Portland") 
 38 | state  <- c('MN', 'CA', 'OR') 
 39 | zip    <- c('55057', '91711', '97202')
 40 | data   <- data.frame(name, street, city, state, zip)      
 41 | 
 42 | 
 43 | # create location variable 
 44 | 
 45 | data$location <- paste(str_trim(as.character(data$street)),
 46 |                        str_trim(as.character(data$city)),
 47 |                        str_trim(as.character(data$state)),
 48 |                        str_trim(as.character(data$zip)), sep=' ')
 49 | 
 50 | 
 51 | ```
 52 | 
 53 | <br/>
 54 | 
 55 | ```{r}
 56 | 
 57 | # create geocode function with tryCatch 
 58 | # geocoding api is from http://www.datasciencetoolkit.org/
 59 | geo.dsk <- function(addr){
 60 |   require(httr)
 61 |   require(rjson)
 62 |   
 63 |   out <- tryCatch({
 64 |     url <- "http://www.datasciencetoolkit.org/maps/api/geocode/json"
 65 |     response <- GET(url,query=list(sensor="FALSE",address=addr))
 66 |     json <- fromJSON(content(response,type="text"))
 67 |     loc <- json['results'][[1]][[1]]$geometry$location
 68 |     return(c(address=addr,long=loc$lng, lat= loc$lat))
 69 |   },
 70 |   
 71 |   error = function(cond) {
 72 |     message(paste("Address not geocoded:", addr))
 73 |     message("Here's the original error message:")
 74 |     message(cond)
 75 |     # Choose a return value in case of error
 76 |     return(NA)
 77 |   },
 78 |   
 79 |   warning = function(cond) {
 80 |     message(paste("Address caused a warning:", addr))
 81 |     message("Here's the original warning message:")
 82 |     message(cond)
 83 |     # Choose a return value in case of warning
 84 |     return(NULL)
 85 |   },
 86 |   
 87 |   finally = {
 88 |     message(paste("Processed Address:", addr))
 89 |     message("One down...")
 90 |   }
 91 |   
 92 | )
 93 |   return(out)
 94 | }
 95 | 
 96 | ```
 97 | 
 98 | <br/>
 99 | 
100 | ```{r}
101 | 
102 | # geocode data and bind coordinates onto data
103 | 
104 | result <- 
105 |           cbind(data,
106 |             as.data.frame(
107 |                   do.call(rbind,
108 |                           lapply(as.character(data$location), geo.dsk))))
109 | 
110 | ```
111 | 
112 | <br/>
113 | 
114 | ```{r}
115 | 
116 | # create map
117 | 
118 | leaflet(result) %>%
119 |   addTiles() %>%
120 |   setView(-93.65, 42.0285, zoom = 3) %>%
121 |   addCircles(result$long, result$lat) %>%
122 |   addPopups(result$long, result$lat, paste(result$name, '!', sep=''))
123 | 
124 | 
125 | ```
126 | 
127 | 


--------------------------------------------------------------------------------
/tutorials/histograms.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Creating Histograms in RSudio"
 3 | date: Reed College, Instructional Technology Services
 4 | output: html_document
 5 | ---
 6 | <br />          
 7 | ```{r}
 8 | # load the mtcars data
 9 | data(mtcars)
10 | ```
11 | 
12 | Create a histogram of the mpg variable 
13 | ```{r}
14 | hist(mtcars$mpg) 
15 | ```
16 | 


--------------------------------------------------------------------------------
/tutorials/histograms_I.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Histograms"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br>
  8 | <br>
  9 | 
 10 | ####Create Data Frame of Majors and FTE by Department at Reed College
 11 | ```{r}
 12 | 
 13 | Departments = c('Art' , 'Music', 'Theatre', 'Anthropology', 'Economics',
 14 |                 'History', 'Political Science', 'Sociology', 'Chinese',  'Classics',
 15 |                 'English', 'French', 'German', 'Russian', 'Spanish',
 16 |                 'Biology', 'Chemistry', 'Mathematics', 'Physics', 'Linguistics',
 17 |                 'Philosophy', 'Psychology', 'Religion')
 18 | 
 19 | Majors = c(58, 21, 16, 52, 56,
 20 |           57, 68, 28, 6, 20, 
 21 |           150, 5, 2, 7, 3, 
 22 |           153.5, 74, 72.5, 125, 45,
 23 |           75, 98, 25)
 24 | 
 25 | FTE = c(7.8, 4, 6.25, 5, 5.6, 
 26 |         8.7, 5.5, 3, 3, 4, 
 27 |         12, 5, 3, 3, 5, 
 28 |         9, 6.8, 8, 6, 4, 
 29 |         5.7, 7.7, 4)
 30 | 
 31 | data <- data.frame(Departments, Majors, FTE)
 32 | 
 33 | # Data does not include 94 interdisciplinary majors and 40 undecided majors.  
 34 | # Majors like bio/chem are split between the two departments 
 35 | # General Lit majors are included with English 
 36 | # Dance majors and faculty are included with Theatre
 37 | # Major Data: http://www.reed.edu/ir/ir_internal_web/intendedmajors.html and FTE Data: http://www.reed.edu/ir/facfte.html
 38 | 
 39 | ```
 40 | 
 41 | <br>
 42 | <br>
 43 | 
 44 | ####Create Histogram  using Base R Commands
 45 | ```{r}
 46 | hist(data$Majors)
 47 | ```
 48 | 
 49 | <br>
 50 | <br>
 51 | 
 52 | ####Add Additional Elements to Base Histogram
 53 | ```{r}
 54 | hist(data$Majors,
 55 |      xlab = "Number of Majors", ylab = "Frequency",  main = "Histogram of Majors", pch = 16, # Add labels
 56 |      breaks=12,  # set number of bins
 57 |      col = "dark blue", lwd = 2) # change color and width of line
 58 | 
 59 | 
 60 | ```
 61 | 
 62 | <br>
 63 | <br>
 64 | 
 65 | ####Create Kernal Density using Base R Commands
 66 | ```{r}
 67 | plot(density(data$Majors), xlim = c(0, 200))
 68 | 
 69 | ```
 70 | 
 71 | <br>
 72 | <br>
 73 | 
 74 | ####Add Additional Elements to Base Density Plot
 75 | ```{r}
 76 | plot(density(data$Majors),
 77 |      xlim = c(0, 200),
 78 |      xlab = "Number of Majors", ylab = "Density",  main = "Histogram of Majors", pch = 16, # Add labels
 79 |      col = "dark blue", lwd = 4) # change color and width of line
 80 | 
 81 | ```
 82 | 
 83 | 
 84 | <br>
 85 | <br>
 86 | 
 87 | ####Using ggplot2 to Make a Histogram
 88 | ```{r, message=FALSE}
 89 | # This demo requires the 'ggplot' package 
 90 | if( !is.element("ggplot2", installed.packages()[,1]) )
 91 |   install.packages("ggplot2")
 92 | 
 93 | suppressPackageStartupMessages(library(ggplot2))
 94 | 
 95 | ## Base histogram plot in ggplot 
 96 | ggplot(data, aes(x=Majors)) + geom_histogram()
 97 | ```
 98 | 
 99 | 
100 | <br>
101 | <br>
102 | 
103 | ####Apply Theme to  Histogram Plot
104 | ```{r, message=FALSE}
105 | ggplot(data, aes(x=Majors)) + 
106 |   geom_histogram() +
107 |   theme_classic()
108 | ```
109 | 
110 | <br>
111 | <br>
112 | 
113 | ####Add Additional Elements to Histogram
114 | ```{r, message=FALSE}
115 | 
116 | ggplot(data, aes(x=Majors)) + 
117 |   geom_histogram(color="dark blue", size=1, fill="light blue", binwidth=15) +  # change color and adjust bindwidth
118 |   ggtitle("Histogram of Reed College Majors") + # add a title to the plot 
119 |   theme_classic()
120 | 
121 | ```
122 | 
123 | <br>
124 | <br>
125 | 
126 | 
127 | ####Using ggplot to Make a Density Plot
128 | ```{r}
129 | 
130 | ggplot(data, aes(x=Majors)) + 
131 |   geom_density(color="dark blue", size=1, fill="light blue") + # change to geom_density for density plot 
132 |   ggtitle("Kernal Density of Reed College Majors") + 
133 |   theme_classic()
134 | 
135 | ```
136 | 
137 | <br>
138 | <br>
139 | 
140 | 
141 | 
142 | ####Make Your Histogram Interactive with googleVis
143 |  
144 | ```{r, warning=FALSE}
145 | ## This demo requires the 'googleVis' package 
146 | if( !is.element("googleVis", installed.packages()[,1]) )
147 |   install.packages("googleVis")
148 | 
149 | suppressPackageStartupMessages(library(googleVis))
150 | 
151 | 
152 | # make a new data frame with only columns to plot 
153 | keep <- c('Departments', 'Majors')
154 | data2 <- data[keep]
155 | 
156 | # create interactive histogram plot using googleVis
157 | Hist <- gvisHistogram(data2, options=list(
158 |   legend="{ position: 'right', maxLines: 2 }",
159 |   colors="['#1A8763']",
160 |   width=750, height=500))
161 |                                         
162 | ```
163 | 
164 | ```{r, results = 'asis'}
165 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
166 | print(Hist, 'chart') 
167 | 
168 | ```
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/tutorials/histograms_pdf.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Histograms in R"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: pdf_document
 5 | ---
 6 | 
 7 | **Create Histogram  using Base R Commands**
 8 | ```{r}
 9 | cars <- mtcars
10 | hist(cars$mpg)
11 | ```
12 | \newpage
13 | 
14 | **Add Additional Elements to Base Histogram**
15 | ```{r}
16 | hist(cars$mpg,
17 |      xlab = "MPG", ylab = "Frequency",  main = "Histogram of MPG", # Add labels
18 |      breaks=12,  # set number of bins
19 |      col = "dark blue") # change color 
20 | 
21 | 
22 | ```
23 | \newpage
24 | 
25 | **Create Kernal Density using Base R Commands**
26 | ```{r}
27 | plot(density(cars$mpg))
28 | 
29 | ```
30 | \newpage
31 | 
32 | **Add Additional Elements to Base Density Plot**
33 | ```{r}
34 | plot(density(cars$mpg),
35 |      xlab = "MPG", ylab = "Density",  main = "Density Plot of MPG", # Add labels
36 |      col = "dark blue", lwd = 4) # change color and width of line
37 | 
38 | ```
39 | \newpage
40 | 
41 | **Using ggplot2 to Make a Histogram**
42 | ```{r, message=FALSE}
43 | # This demo requires the 'ggplot' package 
44 | if( !is.element("ggplot2", installed.packages()[,1]) )
45 |   install.packages("ggplot2")
46 | 
47 | suppressPackageStartupMessages(library(ggplot2))
48 | 
49 | ## Base histogram plot in ggplot 
50 | ggplot(cars, aes(x=mpg)) + geom_histogram()
51 | ```
52 | \newpage
53 | 
54 | 
55 | **Apply Theme to  Histogram Plot**
56 | ```{r, message=FALSE}
57 | ggplot(cars, aes(x=mpg)) + 
58 |   geom_histogram() +
59 |   theme_classic()
60 | ```
61 | \newpage
62 | 
63 | **Add Additional Elements to Histogram**
64 | ```{r, message=FALSE}
65 | 
66 | ggplot(cars, aes(x=mpg)) + 
67 |   geom_histogram(color="dark blue", size=1, fill="light blue", binwidth=15) +  # change color and adjust bindwidth
68 |   ggtitle("Histogram of MPG") + # add a title to the plot 
69 |   theme_classic()
70 | 
71 | ```
72 | \newpage
73 | 
74 | **Using ggplot to Make a Density Plot**
75 | ```{r}
76 | 
77 | ggplot(cars, aes(x=mpg)) + 
78 |   geom_density(color="dark blue", size=1, fill="light blue") + # change to geom_density for density plot 
79 |   ggtitle("Kernal Density of MPG") + 
80 |   theme_classic()
81 | 
82 | ```
83 | \newpage
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/tutorials/histograms_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/histograms_pdf.pdf


--------------------------------------------------------------------------------
/tutorials/line graphs advanced.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Line Graphs"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | <br>
 8 | <br>
 9 | 
10 | ####Download and Load Data
11 | ```{r}
12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
13 | data <- read.csv("/Users/majerus/Downloads/ames.csv")   # update to file path on your computer 
14 | ```
15 | 
16 | <br>
17 | <br>
18 | 
19 | ####Calculate the Mean of each Continuous Variable by Year
20 | ```{r}
21 | # This calculation requires the 'plyr' package
22 | if( !is.element("plyr", installed.packages()[,1]) )
23 |   install.packages("plyr")
24 | 
25 | library(plyr)
26 | 
27 | # list class of each variable
28 | sapply(data, class)
29 | 
30 | # keep only continuous variables
31 | data_continuous <- data[,sapply(data,is.numeric) | sapply(data,is.integer)]
32 | 
33 | # calculate mean for every column in the data frame by year 
34 | means <- ddply(data_continuous, .(Yr.Sold), numcolwise(mean), na.rm=T)
35 | 
36 | 
37 | ```
38 | 
39 | <br>
40 | <br>
41 | 
42 | ####Create Line Graph Plot Function
43 | ```{r, message=FALSE}
44 | 
45 | # This demo requires the 'ggplot' package 
46 | if( !is.element("ggplot2", installed.packages()[,1]) )
47 |   install.packages("ggplot2")
48 | 
49 | suppressPackageStartupMessages(library(ggplot2))
50 | 
51 | # this function will create a line graph of every variable in your data frame
52 | # the function takes two arguments 'x' (the name of your data frame) and 'time' (the name variable to be plotted on the x-axis)
53 | 
54 | line <- function(x, time, na.rm = TRUE, ...) {
55 |   nm <- names(x)
56 |   for (i in seq_along(nm)) {
57 |     print(ggplot(x, aes_string(x = time, y = nm[i])) + geom_line(size=2, color = "darkblue") + theme_classic()) }
58 | }
59 | 
60 | ```
61 | 
62 | <br>
63 | <br>
64 | 
65 | ####Run Line Graph Function to Create Plots for an Entire Data Frame
66 | ```{r}
67 | line(x=means, time="means$Yr.Sold")
68 | ```
69 | 


--------------------------------------------------------------------------------
/tutorials/line graphs.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Line Graphs"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br>
  8 | <br>
  9 | 
 10 | ####Download and Load Data
 11 | ```{r}
 12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
 13 | data <- read.csv("/Users/majerus/Downloads/ames.csv")   # update to file path on your computer 
 14 | ```
 15 | 
 16 | <br>
 17 | <br>
 18 | 
 19 | ####Calculate Mean Sale Price by Year
 20 | ```{r}
 21 | # This calculation requires the 'plyr' package
 22 | if( !is.element("plyr", installed.packages()[,1]) )
 23 |   install.packages("plyr")
 24 | 
 25 | library(plyr)
 26 | 
 27 | mean <- ddply(data, .(Yr.Sold), summarize,     
 28 |                mean_price = mean(SalePrice))
 29 | ```
 30 | 
 31 | <br>
 32 | <br>
 33 | 
 34 | 
 35 | 
 36 | ####Create Line Grap  using Base R Commands
 37 | ```{r}
 38 | plot(mean$Yr.Sold, mean$mean_price, type = "o")
 39 | ```
 40 | 
 41 | <br>
 42 | <br>
 43 | 
 44 | ####Add Additional Elements to Base Line Graph
 45 | ```{r}
 46 | 
 47 | plot(mean$Yr.Sold, mean$mean_price, type = "o",
 48 |      xlab = "Year", ylab = "Mean Sale Price",  main = "Line Graph of Mean Home Sale Price in Ames, IA", pch = 16, 
 49 |      col = "dark blue", lwd = 3, cex = 2) 
 50 | 
 51 | 
 52 | ```
 53 | 
 54 | <br>
 55 | <br>
 56 | 
 57 | 
 58 | ####Using ggplot2 to Make a Line Graph
 59 | ```{r, message=FALSE}
 60 | # This demo requires the 'ggplot' package
 61 | if( !is.element("ggplot2", installed.packages()[,1]) )
 62 |   install.packages("ggplot2")
 63 | 
 64 | suppressPackageStartupMessages(library(ggplot2))
 65 | 
 66 | ggplot(mean, aes(Yr.Sold, mean_price)) + 
 67 |   geom_line()
 68 | ```
 69 | 
 70 | 
 71 | <br>
 72 | <br>
 73 | 
 74 | ####Apply Theme to  Line Graph
 75 | ```{r, message=FALSE}
 76 | 
 77 | if( !is.element("ggthemes", installed.packages()[,1]) )
 78 |   install.packages("ggthemes")
 79 | 
 80 | if( !is.element("scales", installed.packages()[,1]) )
 81 |   install.packages("scales")
 82 | 
 83 | suppressPackageStartupMessages(library(ggthemes))
 84 | suppressPackageStartupMessages(library(scales))
 85 | 
 86 | ggplot(mean, aes(Yr.Sold, mean_price)) + 
 87 |   geom_line() +
 88 |   theme_tufte()
 89 | ```
 90 | 
 91 | <br>
 92 | <br>
 93 | 
 94 | ####Add Additional Elements to Line Graph 
 95 | ```{r, message=FALSE}
 96 | 
 97 | ggplot(mean, aes(Yr.Sold, mean_price)) + 
 98 |   geom_line(color="dark blue", size=2) +
 99 |   scale_y_continuous("Mean Sale Price", labels = dollar) +
100 |   scale_x_continuous("Year") +
101 |   ggtitle("Mean Home Sale Price in Ames, IA") +
102 |   theme_tufte() +
103 |   theme(plot.title = element_text(size = 16, face="bold"))
104 | 
105 | ```
106 | 
107 | 
108 | ####Make Your Line Graph Interactive with googleVis 
109 | ```{r, warning=FALSE}
110 | ## This demo requires the 'googleVis' package 
111 | if( !is.element("googleVis", installed.packages()[,1]) )
112 |   install.packages("googleVis")
113 | 
114 | suppressPackageStartupMessages(library(googleVis))
115 | suppressPackageStartupMessages(library(scales))
116 | 
117 | # add names to new data frame as factor 
118 | mean$pop.html.tooltip=dollar_format()(mean$mean_price)
119 | 
120 | # create interactive scatter plot using googleVis
121 | line <- gvisScatterChart(mean,                                                           
122 |                          options=list(tooltip="{isHtml:'True'}",
123 |                          legend="none", lineWidth=5, pointSize=3,                                                     
124 |                          vAxis="{title:'Mean Sale Price'}",                         
125 |                          hAxis="{title:'Year'}",                     
126 |                          width=750, height=500))                            
127 | 
128 | ```
129 | 
130 | <br>
131 | <br>
132 | 
133 | 
134 | ```{r, results = 'asis'}
135 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
136 | print(line, 'chart') 
137 | 
138 | ```
139 | 
140 | <br>
141 | <br>
142 | 
143 | 
144 | ####Add an Edit Button to Your Line Graph 
145 | ```{r, warning=FALSE}
146 | ## This demo requires the 'googleVis' package 
147 | if( !is.element("googleVis", installed.packages()[,1]) )
148 |   install.packages("googleVis")
149 | 
150 | suppressPackageStartupMessages(library(googleVis))
151 | 
152 | # create interactive histogram plot using googleVis
153 | line2 <- gvisScatterChart(mean,                                                           
154 |                           options=list(tooltip="{isHtml:'True'}",
155 |                           legend="none", lineWidth=5, pointSize=3,                                                     
156 |                           vAxis="{title:'Mean Sale Price'}",                         
157 |                           hAxis="{title:'Year'}",                     
158 |                           width=750, height=500, 
159 |                           gvis.editor="Edit Graph"))                  
160 |                                         
161 | ```
162 | 
163 | <br>
164 | <br>
165 | 
166 | ```{r, results = 'asis'}
167 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
168 | print(line2, 'chart') 
169 | 
170 | ```
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/tutorials/load_data.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Loading Data"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | R comes with a number of example data sets.  You can view these data sets in RStudio by typing 'data()'.
 8 | ```{r}
 9 | summary(mtcars) # R has automatically loaded the mtcars data frame for us
10 | ```
11 |       
12 | <BR>
13 | <BR>
14 | If you want to analyze other data in R there are several options for reading your data into R-Studio.  Among the most common are...
15 | <BR>
16 | <BR>
17 |  
18 | **From a .csv file:**
19 | ```{r, eval=FALSE}
20 | # use the read.csv commmand 
21 | cars <- read.csv('filepath/filename.csv', row.names=1) 
22 | # row.names tells R that the first row of our data contains variable names
23 | 
24 | ```
25 | <BR>
26 | <BR>
27 | **From a Stata data file:** 
28 | ```{r, eval=FALSE}
29 | # use the read.dta command that is part of the foreign package 
30 | # if you have not already installed the foreign package you can type install.packages("foreign") to install it
31 | 
32 | library(foreign)
33 | mydata <- read.dta("filepath/filename.dta")
34 | 
35 | ```
36 | <BR>
37 | <BR>
38 | **From an Excel data file:**
39 | ```{r, eval=FALSE}
40 | # use the read.xlsx command that is part of the xlsx package 
41 | # if you have not already installed the xlsx package you can type install.packages("xlsx") to install it 
42 | # alternatively you can save your file in .csv format in Excel
43 | 
44 | library(xlsx)
45 | mydata <- read.xlsx("filepath/filename.xlsx", 2) # the 2 tells R to read in the second page in the Excel workbook
46 | 
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------
/tutorials/loops_with_ggplot2.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using Loops with ggplot2"
  3 | author: "Rich Majerus"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br/>
  8 | <br/>
  9 | 
 10 | 
 11 | ```{r, warning=FALSE}
 12 | library(stringr)
 13 | library(reshape2)
 14 | library(ggplot2)
 15 | library(ggthemes)
 16 | library(pander)
 17 | 
 18 | # update this file path to point toward appropriate folders on your computer
 19 | 
 20 | # folder where you want the graphs to be saved:
 21 | results <- "/Users/majerus/Desktop/NJAIS/results/"  
 22 | 
 23 | # folder where the data is saved:
 24 | labor <- "/Users/majerus/Desktop/NJAIS/data/dept_labor/age_lvl/"
 25 | 
 26 | # create list of all .csv files in folder 
 27 | file_list <- list.files(path=labor, pattern="*.csv") 
 28 | 
 29 | # read in each .csv file in file_list and rbind them into a data frame called data.labor 
 30 | data.labor <- 
 31 |   do.call("rbind", 
 32 |           lapply(file_list, 
 33 |                  function(x) 
 34 |                    cbind(year = as.numeric(str_sub(x, 1, 4)),
 35 |                          read.csv(paste(labor, x, sep=''), 
 36 |                             stringsAsFactors = FALSE))))
 37 | 
 38 | # remove commas from numeric variables
 39 | data.labor[,c(3:12)] <- lapply(
 40 |                           data.labor[,c(3:12)], 
 41 |                           function(x) {as.numeric( 
 42 |                             gsub(",", "", x))})
 43 | 
 44 | # drop 2010 from data then data and projections will occur in 5 year intervals 
 45 | data.labor <- subset(data.labor, data.labor$year!=2010)
 46 | 
 47 | # rename cols 
 48 | colnames(data.labor) <- c("Year",   "County", "Total",  
 49 |                           "Under 5",  '5 to 9 years', '10 to 14 years', '15 to 19 years', 
 50 |                           "X20.24", "X25.29", "X30.34", "X35.39", "X40.44")
 51 | 
 52 | # select columns of interest
 53 | keep <- c("Year", "County", "Total", 'Under 5', 
 54 |           '5 to 9 years', '10 to 14 years', '15 to 19 years')
 55 | 
 56 | data.labor <- data.labor[keep]
 57 | 
 58 | # melt data to long format 
 59 | data.labor.long <- melt(data.labor, id.vars=c("County", "Year"), variable.name="category")
 60 | 
 61 | # remove total projections and state level projections from data
 62 | data.labor.long <- subset(data.labor.long, data.labor.long$category!='Total')
 63 | data.labor.long <- subset(data.labor.long, data.labor.long$County!='New Jersey')
 64 |  
 65 | # create graphing function
 66 | county.graph <- function(df, na.rm = TRUE, ...){
 67 |   
 68 |   # create list of counties in data to loop over 
 69 |   county_list <- unique(df$County)
 70 |   
 71 |   # create for loop to produce ggplot2 graphs 
 72 |   for (i in seq_along(county_list)) { 
 73 |     
 74 |     # create plot for each county in df 
 75 |     plot <- 
 76 |       ggplot(subset(df, df$County==county_list[i]),
 77 |              aes(Year, value/1000, group = County, colour = category)) + 
 78 |       
 79 |       geom_line(size=2) +
 80 |       facet_wrap( ~  category, ncol=2) +
 81 |       
 82 |       theme_pander() +
 83 |       theme(legend.position="none") + 
 84 |       
 85 |       scale_y_continuous("County Population within Age Categories (thousands)", 
 86 |                          limits=c(0, max(df$value[df$County==county_list[i]]))/1000) +
 87 |       scale_x_continuous("Year") +
 88 |       
 89 |       ggtitle(paste(county_list[i], ' County, New Jersey \n', 
 90 |                     "County Population Projection within Age Categories (thousands) \n",
 91 |                     sep=''))
 92 |     
 93 |     # save plots as .png
 94 |     # ggsave(plot, file=paste(results, 
 95 |     #                        'projection_graphs/county_graphs/',
 96 |     #                        county_list[i], ".png", sep=''), scale=2)
 97 |    
 98 |     # save plots as .pdf
 99 |     # ggsave(plot, file=paste(results, 
100 |     #                        'projection_graphs/county_graphs/',
101 |     #                        county_list[i], ".pdf", sep=''), scale=2)
102 |     
103 |     # print plots to screen
104 |     print(plot)
105 |   }
106 | }
107 | 
108 | # run graphing function on long df
109 | county.graph(data.labor.long)
110 | ```
111 | 
112 | 


--------------------------------------------------------------------------------
/tutorials/markdown_formatting.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Markdown Formatting"
 3 | date: Reed College, Instructional Technology Services
 4 | output: html_document
 5 | ---
 6 | <br> <br> 
 7 | 
 8 | #### **Bold Text**
 9 | ```{r}
10 | # **text** or __text__ makes your text between the ** or __ bold 
11 | ```
12 | For example, __this is bold text that I made using two underscores__  
13 | <br> <br> 
14 | 
15 | #### *Italic Text*
16 | ```{r}
17 | # *text* or _text_ makes your text between the * or _ italic 
18 | ```
19 | For example, _this is italic text that I made using one underscore_  
20 | <br> <br> 
21 | 
22 | #### Line breaks
23 | ```{r}
24 | # You can include a line break by ending a line with two (or more) spaces and a return.
25 | # Alternatively, you can use html <br> 
26 | # If we put atwo (or more) spaces and a return after the comma in the preceeding example we get the following: 
27 | ```
28 | For example,  
29 | _this is italic text that I made using one underscore_
30 | <br> <br> 
31 | 
32 | 
33 | #### Inline R Code
34 | ```{r}
35 | # You can insert R code and calculations directly into your text 
36 | # For example, the following line outside of a code chunk is displayed below. 
37 | # The average mpg for cars in the data set is `r round(mean(mtcars$mpg), 2)` miles per gallon.  
38 | ```
39 | 
40 | The average mpg for cars in the data set is `r round(mean(mtcars$mpg), 2)` miles per gallon.  
41 | <br> <br> 
42 | 
43 | 
44 | #### Page Breaks 
45 | In html documents you will not need page breaks.  W
46 | ```{r}
47 | # When you are producing html documents, you can use the latex command "\pagebreak" to insert a page break
48 | ```
49 | <br> <br> 
50 | 
51 | 
52 | #### Headings
53 | ```{r}
54 | # You can create headings with "#"
55 | # The following text starts with #Heading and ends with ######Heading
56 | 
57 | ```
58 |   
59 | # Heading
60 | ## Heading
61 | ### Heading
62 | #### Heading
63 | ##### Heading
64 | ###### Heading
65 | <br> <br> 
66 | 
67 | 


--------------------------------------------------------------------------------
/tutorials/matriculants.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Reed College Matriculants"
 3 | author: "Rich Majerus"
 4 | date: "March 11, 2015"
 5 | output: html_document
 6 | ---
 7 | ```{r, echo=FALSE, warning=FALSE}
 8 | # install packages 
 9 | pkg <- c("rvest", "dplyr",  "reshape2", "googleVis",  "magrittr", "ggplot2", "ggthemes", "RColorBrewer")
10 | 
11 | new.pkg <- pkg[!(pkg %in% installed.packages())]
12 | 
13 | if (length(new.pkg)) {
14 |   install.packages(new.pkg)
15 | }
16 | 
17 | # load packages 
18 | suppressMessages(library(rvest))
19 | suppressMessages(library(dplyr))
20 | suppressMessages(library(reshape2))
21 | suppressMessages(library(googleVis))
22 | suppressMessages(library(ggplot2))
23 | suppressMessages(library(ggthemes))
24 | suppressMessages(library(RColorBrewer))
25 | 
26 | # download html file
27 | webpage <- html("http://www.reed.edu/ir/geographic_states.html")
28 | 
29 | # the data we want is in the first table on this page
30 | # the html_table() command coerces the data into a data frame
31 | webpage %>%
32 |   html_nodes("table") %>%
33 |   .[[1]] %>%
34 |   html_table()
35 | 
36 | # repeat above code but store results in a data frame
37 | data <- 
38 |   webpage %>%
39 |   html_nodes("table") %>%
40 |   .[[1]] %>%
41 |   html_table()
42 | 
43 | # we can now work with this data from the web as a data frame in R
44 | # remove total row from data 
45 | data <- 
46 |   data %>% 
47 |   filter(State!='Total')
48 | 
49 | # reshape data for plotting 
50 | data_long <- melt(data, id='State')
51 | 
52 | # rename columns in long data frame 
53 | colnames(data_long) <- c('State', 'Year', 'Matriculants')
54 | 
55 | # select states that we want to graph
56 | keep <- c('Iowa', 'Minnesota', 'South Dakota',  'Wisconsin')
57 | ```
58 | 
59 | ```{r}
60 | # plot selected states
61 | ggplot(
62 |   subset(data_long, data_long$State %in% keep),
63 |   aes(Year, Matriculants, group = State, colour = State)) + 
64 |   geom_line(size=2) 
65 | ```
66 | 
67 | ```{r}
68 | # plot selected states (facets)
69 | ggplot(
70 |   subset(data_long, data_long$State %in% keep),
71 |   aes(Year, Matriculants, group = State, colour = State)) + 
72 |   geom_line(size=2) +
73 |   facet_wrap( ~  State, 
74 |               ncol=2) 
75 | ```
76 |               
77 | ```{r}
78 | # plot selected states (facets) with title, theme and different colors
79 | ggplot(
80 |   subset(data_long, data_long$State %in% keep),
81 |   aes(Year, Matriculants, group = State, colour = State)) + 
82 |   geom_line(size=2) +
83 |   facet_wrap( ~  State, 
84 |               ncol=nrow(subset(data_long, data_long$State %in% keep))/16) +
85 |   ggtitle("Reed College Matriculants by State") +
86 |   theme_tufte() +
87 |   theme(plot.title = element_text(size = 16, face="bold")) + 
88 |   scale_colour_brewer("Colors in Set1", palette="Set1")
89 | ```
90 | 
91 | 


--------------------------------------------------------------------------------
/tutorials/missing_data.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Missing Data"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br>
  8 | <br>
  9 | 
 10 | ####Create Missing Values in mtcars data 
 11 | ```{r}
 12 | # For this example we will pretend we are missing mpg data for Merc 280, Dodge Challenger and Ferrari Dino in mtcars
 13 | 
 14 | mtcars$mpg <- ifelse(rownames(mtcars) == 'Merc 280'|
 15 |                      rownames(mtcars) == 'Dodge Challenger'|
 16 |                      rownames(mtcars) == 'Ferrari Dino', 
 17 |                      -99, mtcars$mpg)
 18 | ```                       
 19 | 
 20 | <br>
 21 | <br>
 22 | 
 23 | ####Change Missing Value Code to NA
 24 | ```{r}
 25 | mtcars[mtcars==-99] <- NA
 26 | ```                       
 27 | 
 28 | <br>
 29 | <br>
 30 | 
 31 | ####Identify Missing Values in Data Frame
 32 | ```{r}
 33 | # list total number of missing values by variable
 34 | colSums(is.na(mtcars))
 35 | ```
 36 | 
 37 | ```{r}
 38 | # list names of cars with missing mpg
 39 | rownames(mtcars)[is.na(mtcars$mpg)]
 40 | ```
 41 | 
 42 | <br>
 43 | <br>
 44 | 
 45 | ####Calculate Mean MPG
 46 | ```{r}
 47 | mean(mtcars$mpg)   # missing values mess up even simple calculations
 48 | ```
 49 | 
 50 | ```{r}
 51 | mean(mtcars$mpg, na.rm = TRUE)   # we can get around this by telling R to ignore missing values 
 52 | ```
 53 | 
 54 | <br>
 55 | <br>
 56 | 
 57 | ####Mean Imputation 
 58 | ```{r}
 59 | # Mean Imputation
 60 | mtcars.imputed <- mtcars
 61 | mtcars.imputed$mpg <- ifelse(is.na(mtcars$mpg), mean(mtcars$mpg, na.rm = TRUE), mtcars$mpg)
 62 | 
 63 | colSums(is.na(mtcars.imputed)) # no more missing data 
 64 | summary(mtcars.imputed$mpg)
 65 | ```
 66 | 
 67 | <br>
 68 | <br>
 69 | 
 70 | ####Mean Imputation over every Column 
 71 | ```{r}
 72 | # Create mean imputation function
 73 | mean.imputation <- function(df,...) {
 74 | apply(df, 2, function(x) {x <- ifelse(is.na(x), mean(x, na.rm = TRUE), x)})
 75 |     }
 76 | 
 77 | mtcars.imputed <- mean.imputation(mtcars) 
 78 | 
 79 | colSums(is.na(mtcars.imputed)) # no more missing data 
 80 | ```
 81 | 
 82 | <br>
 83 | <br>
 84 | 
 85 | ####Percentile Imputation 
 86 | ```{r}
 87 | #  Imputation
 88 | mtcars.imputed <- mtcars
 89 | mtcars.imputed$mpg[is.na(mtcars.imputed$mpg)] <- quantile(mtcars.imputed$mpg, .95, na.rm = TRUE) # impute missing with 95th percentile
 90 | 
 91 | colSums(is.na(mtcars.imputed)) # no more missing data 
 92 | summary(mtcars.imputed$mpg)
 93 | 
 94 | ```
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/tutorials/read_and_summarize_multiple_txt.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Read Multiple .txt files and Write Summary Stats"
 3 | output: html_document
 4 | ---
 5 | 
 6 | <br/>
 7 | 
 8 | #### Install, Update and Load Packages
 9 | ```{r, warning=FALSE}
10 | pkg <- c("stringr", "reshape2",  "dplyr", "ggplot2",  "magrittr")
11 | 
12 | new.pkg <- pkg[!(pkg %in% installed.packages())]
13 | 
14 | if (length(new.pkg)) {
15 |   install.packages(new.pkg)
16 | }
17 | 
18 | library(stringr)
19 | library(reshape2)
20 | suppressPackageStartupMessages(library(dplyr))
21 | library(ggplot2)
22 | 
23 | 
24 | ```
25 | <br/>
26 | 
27 | 
28 | #### Read in Data 
29 | ```{r}
30 | # update this file path to point toward appropriate folder on your computer
31 | folder <- "/Users/majerus/Desktop/thesis_projects/linguistics/Yevgeniy/exp1/"      
32 | file_list <- list.files(path=folder, pattern="*.txt")                              
33 | 
34 | # read in each .txt file in file_list and rbind them into a data frame called data 
35 | data <- 
36 |   do.call("rbind", 
37 |           lapply(file_list, 
38 |                  function(x) 
39 |                  read.table(paste(folder, x, sep=''), 
40 |                             header = TRUE, 
41 |                             stringsAsFactors = FALSE)))
42 | 
43 | 
44 | ```
45 | <br/>
46 | 
47 | #### Clean Data 
48 | ```{r}
49 | clean.data <- function(df){
50 |   df <- cbind(df, colsplit(df$stimulus, ',', names =  c('s1','s2', 's3')))
51 |   df$answer <- ifelse(str_count(df$stimulus, 'A') == 2, 'A', 'B')
52 |   df$correct <- ifelse(df$response == df$answer, 1, 0)
53 |   df$reactionTime <- as.numeric(df$reactionTime)
54 |   return(df)
55 | }
56 | 
57 | data <- clean.data(data)
58 | ```
59 | <br/>
60 | 
61 | #### Save Cleaned and Combined Data
62 | ```{r}
63 | write.csv(data, paste(folder,'cleaned_data.csv', sep = ''), row.names = FALSE)
64 | ```
65 | <br/>
66 | 
67 | #### Create Data Frame of Summary Statistics 
68 | ```{r}
69 | summary_stats <- 
70 |   data %>%
71 |   group_by(subject, correct, answer) %>%
72 |   summarise(count = n(),
73 |             mean_reactionTime = mean(reactionTime, na.rm = TRUE),
74 |             sd_reactionTime = sd(reactionTime, na.rm = TRUE),
75 |             min_reactionTime= min(reactionTime, na.rm = TRUE),
76 |             max_reactionTime = max(reactionTime, na.rm = TRUE))
77 | ```
78 | <br/>
79 | 
80 | #### Save Summary Statistics
81 | ```{r}
82 | write.csv(summary_stats, paste(folder,'summary_stats.csv', sep = ''), row.names = FALSE)
83 | ```
84 | 
85 | 


--------------------------------------------------------------------------------
/tutorials/reading_and_writing.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Reading and Writing .csv Files in RSudio"
 3 | date: Reed College, Instructional Technology Services
 4 | output: pdf_document
 5 | ---
 6 | <br /> 
 7 | 
 8 | **Save a data frame to .csv file using the write.csv command**
 9 | ```{r}
10 | data(mtcars) 
11 | 
12 | # use the write.csv command followed by the file path (i.e. where you would like to save the file)
13 | write.csv(mtcars, '/Users/majerus/Desktop/R/intro/data/cars.csv', row.names=T)
14 | 
15 | # remove the cars data from the workspace
16 | rm(mtcars) 
17 | ```
18 | <br />  
19 | 
20 | 
21 | **Load data from a .csv file using the read.csv command**
22 | ```{r}
23 | # use the read.csv command followed by the file path
24 | # row.names=1 tells R that the data in the first column are the names of the rows
25 | cars <- read.csv('/Users/majerus/Desktop/R/intro/data/cars.csv', row.names=1)
26 | 
27 | ```
28 | <br />  
29 | 
30 | **Loading multiple .csv files as separate data frames**
31 | ```{r}
32 | 
33 | folder <- "/Users/majerus/Desktop/R/intro/data/"      # path to folder that holds multiple .csv files
34 | file_list <- list.files(path=folder, pattern="*.csv") # create list of all .csv files in folder
35 | 
36 | # read in each .csv file in file_list and create a data frame with the same name as the .csv file
37 | for (i in 1:length(file_list)){
38 |   assign(file_list[i], 
39 |   read.csv(paste(folder, file_list[i], sep=''))
40 | )}
41 | 
42 | ```
43 | <br />  
44 | 
45 | **Loading multiple .csv files into the same data frame**
46 | ```{r}
47 | 
48 | folder <- "/Users/majerus/Desktop/R/intro/data/"      # path to folder that holds multiple .csv files
49 | file_list <- list.files(path=folder, pattern="*.csv") # create list of all .csv files in folder
50 | 
51 | # read in each .csv file in file_list and rbind them into a data frame called data 
52 | data <- 
53 |   do.call("rbind", 
54 |           lapply(file_list, 
55 |                  function(x) 
56 |                  read.csv(paste(folder, x, sep=''), 
57 |                  stringsAsFactors = FALSE)))
58 | 
59 | 
60 | ```
61 | 
62 | 
63 | **Load data from a Stata data file**
64 | ```{r, eval=FALSE}
65 | # use the read.dta command that is part of the foreign package 
66 | # type install.packages("foreign") to install the foreign package
67 | 
68 | library(foreign)
69 | mydata <- read.dta("filepath/filename.dta")
70 | 
71 | ```
72 | <BR>
73 | <BR>
74 | 
75 | **Load data from an Excel data file**
76 | ```{r, eval=FALSE}
77 | # use the read.xlsx command that is part of the xlsx package 
78 | # type install.packages("xlsx") to install the xslx package 
79 | 
80 | library(xlsx)
81 | mydata <- read.xlsx("filepath/filename.xlsx", 2) 
82 | # the 2 tells R to read in the second page in the Excel workbook
83 | 
84 | ```
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/tutorials/reading_and_writing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/reading_and_writing.pdf


--------------------------------------------------------------------------------
/tutorials/regex.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "regex"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | <br/>
 8 | 
 9 | 
10 | 
11 | #### Regex Classes 
12 | 
13 | [:alnum:]
14 | Alphanumeric characters: [:alpha:] and [:digit:].
15 | 
16 | [:alpha:]
17 | Alphabetic characters: [:lower:] and [:upper:].
18 | 
19 | [:blank:]
20 | Blank characters: space and tab, and possibly other locale-dependent characters such as non-breaking space.
21 | 
22 | [:cntrl:]
23 | Control characters. In ASCII, these characters have octal codes 000 through 037, and 177 (DEL). In another character set, these are the equivalent characters, if any.
24 | 
25 | [:digit:]
26 | Digits: 0 1 2 3 4 5 6 7 8 9.
27 | 
28 | [:graph:]
29 | Graphical characters: [:alnum:] and [:punct:].
30 | 
31 | [:lower:]
32 | Lower-case letters in the current locale.
33 | 
34 | [:print:]
35 | Printable characters: [:alnum:], [:punct:] and space.
36 | 
37 | [:punct:]
38 | Punctuation characters:
39 | ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
40 | 
41 | [:space:]
42 | Space characters: tab, newline, vertical tab, form feed, carriage return, space and possibly other locale-dependent characters.
43 | 
44 | [:upper:]
45 | Upper-case letters in the current locale.
46 | 
47 | [:xdigit:]
48 | Hexadecimal digits:
49 | 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f.
50 | 
51 | See http://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html for more information. 
52 | 
53 | <br/>
54 | 
55 | ##### Using Regular Expressions in R 
56 | 
57 | ```{r}
58 | 
59 | ```
60 | 
61 | <br/>
62 | 
63 | ##### Sampling
64 | 
65 | ```{r}
66 | 
67 | ```
68 | 


--------------------------------------------------------------------------------
/tutorials/reordering_geom_bar.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Reordering Bars by Frequency in ggplot2"
 3 | output: html_document
 4 | ---
 5 | 
 6 | <br/>
 7 | 
 8 | ```{r, warning=FALSE}
 9 | 
10 | suppressMessages(library(ggthemes))
11 | suppressMessages(library(ggplot2))
12 | suppressMessages(library(dplyr))
13 | 
14 | cars <- mtcars
15 | 
16 | data <- 
17 |   cars %>%
18 |   group_by(carb) %>%
19 |   summarise(mpg=mean(mpg))
20 | 
21 | ggplot(data=data, aes(x=carb, y=mpg)) +
22 |   geom_bar(stat="identity")
23 | 
24 | data$carb <- factor(data$carb, levels = data$carb[order(-data$mpg)])
25 | 
26 | ggplot(data=data, aes(x=carb, y=mpg)) +
27 |   geom_bar(stat="identity")
28 | 
29 | ggplot(data=data, aes(x=carb, y=mpg, fill='Dark Blue')) +
30 |   geom_bar(stat="identity") +
31 |   theme_tufte()
32 | 
33 | 
34 | ```
35 | 
36 | 


--------------------------------------------------------------------------------
/tutorials/reproducible_research.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Reproducible Research"
 3 | author: "Rich Majerus"
 4 | output:
 5 |   pdf_document:
 6 |     fig_caption: true
 7 | ---
 8 | 
 9 | ```{r, echo=FALSE}
10 | # define paths and files (this is all you need to change to run this code on your machine)
11 | folder <- '/Users/majerus/Desktop/2014 projects/blog/post1_logs/'
12 | file <- 'state_enrollment_reed.csv' 
13 | 
14 | # load libraries 
15 | library(ggplot2)
16 | library(ggthemes)
17 | library(RColorBrewer)
18 | library(xtable)
19 | 
20 | # load data 
21 | state <- read.csv(paste(folder, file, sep=''))
22 | 
23 | # remove Washington, D.C. from data for mapping 
24 | state <- subset(state, state$State!='Washington, DC')
25 | 
26 | # create variable that is log of 2013 data 
27 | state$l2013.t <- log(ifelse(state$X2013!=0, state$X2013, 1))
28 | 
29 | # rename columns for mapping
30 | colnames(state) <- c('state', 'yr_2007', 'yr_2008', 'yr_2009', 'yr_2010', 'yr_2011', 'yr_2012', 'yr_2013', 'log_2013')
31 | state$state <- tolower(state$state)
32 | 
33 | # read in mapping data 
34 | state_df <- map_data("state")
35 | 
36 | # join reed data and mapping data at the state level 
37 | choropleth <- merge(state_df, state, by.x = "region", by.y = "state")
38 | 
39 | # order data to properly display on map 
40 | choropleth <- choropleth[order(choropleth$order), ]
41 | 
42 | # create map of 2013 enrollment 
43 | map13 <- ggplot(choropleth, aes(long, lat, group = group)) +
44 |          geom_polygon(aes(fill = yr_2013)) +   
45 |          coord_fixed() +       
46 |          theme_tufte() +
47 |          scale_fill_gradientn(colours=brewer.pal(9,"Greens"), name = " ") +
48 |          scale_x_continuous("Longitude") + 
49 |          scale_y_continuous("Latitude") 
50 | 
51 | # create map of log of 2013 enrollment 
52 | map13l <- ggplot(choropleth, aes(long, lat, group = group)) +
53 |           geom_polygon(aes(fill = log_2013))+
54 |           coord_fixed() +   
55 |           theme_tufte() +
56 |           scale_fill_gradientn(colours=brewer.pal(9,"Greens"), name = " ") +
57 |           scale_x_continuous("Longitude") + 
58 |           scale_y_continuous("Latitude") 
59 | 
60 | ```
61 | 
62 | You can write your entire paper (text, code, analysis, graphics, etc.) all in R Markdown.  As an example, the following maps show the the geographic distribution of Reed College's enrolling students. You can reproduce this example in RStudio with this [code](https://files.reed.edu/?path=%2Fafs%2Freed.edu%2Fuser%2Fm%2Fa%2Fmajerus%2FPublic%2Ftutorials%2Fcode%2Freproducible_research.Rmd) and this [data](https://files.reed.edu/?path=%2Fafs%2Freed.edu%2Fuser%2Fm%2Fa%2Fmajerus%2FPublic%2Ftutorials%2Fdata%2Fstate_enrollment_reed.csv).   Additionally, You can access a dynamic version of this analysis [online](http://blogs.reed.edu/ed-tech/2014/08/choropleth-maps/). Figure 1 shows the raw matriculant data from 2013 mapped by state. The darker a state's shading, the more matriculants from that state.  
63 | 
64 | ```{r, echo=FALSE, fig.cap="Domestic Geographic Distribution of 2013 Entering Class"}
65 | map13
66 | ```
67 | 
68 | However, we may be interested in learning more about the variation in matriculants across all states rather than identifying the states that account for the greatest number of matriculants. One way to approach this task is to map the log of matriculants. Log transforming a variable that contains exceptionally large values (i.e., a right skewed variable) pulls those large values closer to the mean and yields a more symmetrically distributed variable.  As for the map, log transforming the number of matriculants increases the variation in the color gradient across states and enables us to better visualize the distribution of Reed's matriculants across the entire country.
69 | 
70 | ```{r, echo=FALSE, fig.cap="Domestic Geographic Distribution of 2013 Entering Class (Log Transformed)"}
71 | map13l
72 | ```
73 | 
74 | 


--------------------------------------------------------------------------------
/tutorials/reproducible_research.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/reproducible_research.pdf


--------------------------------------------------------------------------------
/tutorials/reproducible_research.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[]{article}
  2 | \usepackage[T1]{fontenc}
  3 | \usepackage{lmodern}
  4 | \usepackage{amssymb,amsmath}
  5 | \usepackage{ifxetex,ifluatex}
  6 | \usepackage{fixltx2e} % provides \textsubscript
  7 | % use upquote if available, for straight quotes in verbatim environments
  8 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
  9 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
 10 |   \usepackage[utf8]{inputenc}
 11 | \else % if luatex or xelatex
 12 |   \ifxetex
 13 |     \usepackage{mathspec}
 14 |     \usepackage{xltxtra,xunicode}
 15 |   \else
 16 |     \usepackage{fontspec}
 17 |   \fi
 18 |   \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
 19 |   \newcommand{\euro}{€}
 20 | \fi
 21 | % use microtype if available
 22 | \IfFileExists{microtype.sty}{\usepackage{microtype}}{}
 23 | \usepackage[margin=1in]{geometry}
 24 | \usepackage{graphicx}
 25 | % Redefine \includegraphics so that, unless explicit options are
 26 | % given, the image width will not exceed the width of the page.
 27 | % Images get their normal width if they fit onto the page, but
 28 | % are scaled down if they would overflow the margins.
 29 | \makeatletter
 30 | \def\ScaleIfNeeded{%
 31 |   \ifdim\Gin@nat@width>\linewidth
 32 |     \linewidth
 33 |   \else
 34 |     \Gin@nat@width
 35 |   \fi
 36 | }
 37 | \makeatother
 38 | \let\Oldincludegraphics\includegraphics
 39 | {%
 40 |  \catcode`\@=11\relax%
 41 |  \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}%
 42 | }%
 43 | \ifxetex
 44 |   \usepackage[setpagesize=false, % page size defined by xetex
 45 |               unicode=false, % unicode breaks when used with xetex
 46 |               xetex]{hyperref}
 47 | \else
 48 |   \usepackage[unicode=true]{hyperref}
 49 | \fi
 50 | \hypersetup{breaklinks=true,
 51 |             bookmarks=true,
 52 |             pdfauthor={Rich Majerus},
 53 |             pdftitle={Reproducible Research},
 54 |             colorlinks=true,
 55 |             citecolor=blue,
 56 |             urlcolor=blue,
 57 |             linkcolor=magenta,
 58 |             pdfborder={0 0 0}}
 59 | \urlstyle{same}  % don't use monospace font for urls
 60 | \setlength{\parindent}{0pt}
 61 | \setlength{\parskip}{6pt plus 2pt minus 1pt}
 62 | \setlength{\emergencystretch}{3em}  % prevent overfull lines
 63 | \setcounter{secnumdepth}{0}
 64 | 
 65 | \title{Reproducible Research}
 66 | \author{Rich Majerus}
 67 | \date{August 27, 2014}
 68 | 
 69 | \begin{document}
 70 | 
 71 | \begin{center}
 72 | \huge Reproducible Research \\[0.2cm]
 73 | \large \emph{Rich Majerus}\\[0.1cm]
 74 | \large \emph{August 27, 2014} \\
 75 | \normalsize
 76 | \end{center}
 77 | 
 78 | 
 79 | \begin{verbatim}
 80 | ## % latex table generated in R 3.1.0 by xtable 1.7-3 package
 81 | ## % Thu Aug 28 09:13:08 2014
 82 | ## \begin{table}[ht]
 83 | ## \centering
 84 | ## \begin{tabular}{rlrrrrrrr}
 85 | ##   \hline
 86 | ##  & State & 2007 & 2008 & 2009 & 2010 & 2011 & 2012 & 2013 \\ 
 87 | ##   \hline
 88 | ## 1 & alabama &   0 &   5 &   1 &   0 &   0 &   0 &   1 \\ 
 89 | ##   2 & alaska &   2 &   0 &   3 &   1 &   3 &   3 &   2 \\ 
 90 | ##   3 & arizona &  12 &   7 &   3 &   3 &   5 &   8 &   5 \\ 
 91 | ##   4 & arkansas &   0 &   0 &   0 &   2 &   0 &   0 &   0 \\ 
 92 | ##   5 & california &  71 &  65 &  94 &  96 &  97 &  85 &  87 \\ 
 93 | ##   6 & colorado &   8 &  14 &   5 &  11 &  13 &   7 &   7 \\ 
 94 | ##   7 & connecticut &   5 &   7 &   5 &   9 &  13 &   7 &   3 \\ 
 95 | ##   8 & delaware &   0 &   1 &   1 &   0 &   0 &   0 &   0 \\ 
 96 | ##   10 & florida &   8 &   9 &   5 &  10 &   4 &   4 &   9 \\ 
 97 | ##   11 & georgia &   1 &   7 &   4 &   3 &   7 &   2 &   0 \\ 
 98 | ##   12 & hawaii &   5 &   1 &   3 &   1 &   2 &   2 &   3 \\ 
 99 | ##   13 & idaho &   1 &   2 &   2 &   3 &   3 &   2 &   4 \\ 
100 | ##   14 & illinois &  12 &   5 &   5 &  12 &   4 &   3 &  11 \\ 
101 | ##   15 & indiana &   1 &   2 &   2 &   3 &   2 &   1 &   0 \\ 
102 | ##   16 & iowa &   1 &   1 &   2 &   2 &   1 &   2 &   2 \\ 
103 | ##   17 & kansas &   0 &   0 &   1 &   1 &   1 &   0 &   0 \\ 
104 | ##   18 & kentucky &   0 &   0 &   1 &   0 &   0 &   1 &   0 \\ 
105 | ##   19 & louisiana &   0 &   4 &   1 &   2 &   0 &   2 &   3 \\ 
106 | ##   20 & maine &   2 &   1 &   2 &   3 &   4 &   2 &   2 \\ 
107 | ##   21 & maryland &   5 &   3 &   7 &   7 &   2 &   5 &   5 \\ 
108 | ##   22 & massachusetts &  15 &  20 &  23 &  18 &  19 &  15 &  17 \\ 
109 | ##   23 & michigan &   3 &   2 &   4 &   2 &   3 &   3 &   2 \\ 
110 | ##   24 & minnesota &   9 &  11 &   9 &   6 &   6 &   2 &   7 \\ 
111 | ##   25 & mississippi &   0 &   0 &   0 &   0 &   0 &   1 &   0 \\ 
112 | ##   26 & missouri &   5 &   2 &   7 &   4 &   3 &   5 &   0 \\ 
113 | ##   27 & montana &   2 &   1 &   1 &   2 &   1 &   1 &   1 \\ 
114 | ##   28 & nebraska &   1 &   1 &   0 &   1 &   1 &   0 &   0 \\ 
115 | ##   29 & nevada &   3 &   0 &   2 &   0 &   2 &   3 &   1 \\ 
116 | ##   30 & new hampshire &   4 &   1 &   5 &   3 &   2 &   6 &   8 \\ 
117 | ##   31 & new jersey &  13 &   8 &   7 &   8 &   3 &   9 &   7 \\ 
118 | ##   32 & new mexico &   2 &   4 &   2 &   6 &   5 &   6 &   7 \\ 
119 | ##   33 & new york &  26 &  27 &  25 &  23 &  27 &  21 &  24 \\ 
120 | ##   34 & north carolina &   3 &   4 &   3 &   3 &   3 &   1 &   2 \\ 
121 | ##   35 & north dakota &   0 &   0 &   0 &   0 &   0 &   0 &   0 \\ 
122 | ##   36 & ohio &   7 &   3 &   1 &   2 &   3 &   1 &   2 \\ 
123 | ##   37 & oklahoma &   1 &   0 &   1 &   1 &   0 &   1 &   5 \\ 
124 | ##   38 & oregon &  20 &  28 &  30 &  24 &  26 &  28 &  28 \\ 
125 | ##   39 & pennsylvania &   8 &   5 &   4 &   6 &   4 &   6 &   8 \\ 
126 | ##   40 & rhode island &   4 &   2 &   3 &   1 &   2 &   0 &   1 \\ 
127 | ##   41 & south carolina &   3 &   0 &   0 &   1 &   1 &   0 &   1 \\ 
128 | ##   42 & south dakota &   0 &   0 &   0 &   1 &   1 &   0 &   0 \\ 
129 | ##   43 & tennessee &   2 &   2 &   2 &   1 &   3 &   1 &   4 \\ 
130 | ##   44 & texas &  16 &  14 &  16 &  19 &  12 &  14 &  14 \\ 
131 | ##   45 & utah &   1 &   1 &   0 &   2 &   1 &   4 &   4 \\ 
132 | ##   46 & vermont &   1 &   2 &   5 &   1 &   3 &   1 &   5 \\ 
133 | ##   47 & virginia &   2 &   1 &   5 &   4 &  12 &   1 &   5 \\ 
134 | ##   48 & washington &  28 &  22 &  32 &  30 &  32 &  19 &   8 \\ 
135 | ##   49 & west virginia &   0 &   0 &   0 &   1 &   1 &   1 &   0 \\ 
136 | ##   50 & wisconsin &   2 &   4 &   0 &   2 &   2 &   5 &   5 \\ 
137 | ##   51 & wyoming &   0 &   1 &   1 &   1 &   0 &   0 &   0 \\ 
138 | ##    \hline
139 | ## \end{tabular}
140 | ## \end{table}
141 | \end{verbatim}
142 | 
143 | You can write your entire paper (text, code, analysis, graphics, etc.)
144 | all in R Markdown. As an example, here is a short analysis of the
145 | geographic distribution of Reed College's enrolling students. The
146 | \href{http://www.reed.edu/ir/geographic_states.html}{Institutional
147 | Research Office webpage} has information about the geographic
148 | distribution of Reed's entering classes from 2007-2013.
149 | 
150 | Figure 1 shows the raw matriculant data from 2013 mapped by state. The
151 | darker a state's shading, the more matriculants from that state. Mousing
152 | over a state will reveal the exact number of students who matriculated
153 | from a certain state.
154 | 
155 | \begin{figure}[htbp]
156 | \centering
157 | \includegraphics{./reproducible_research_files/figure-latex/unnamed-chunk-3.pdf}
158 | \caption{Domestic Geographic Distribution of 2013 Entering Class}
159 | \end{figure}
160 | 
161 | However, we may be interested in learning more about the variation in
162 | matriculants across all states rather than identifying the states that
163 | account for the greatest number of matriculants. One way to approach
164 | this task is to map the log of matriculants or to take the log
165 | transformation of the variable of interest. Log transforming a variable
166 | that contains exceptionally large values (i.e., a right skewed variable)
167 | pulls those large values closer to the mean and yields a more
168 | symmetrically distributed variable. As for the map, log transforming the
169 | number of matriculants increases the variation in the color gradient
170 | across states and enables us to better visualize the distribution of
171 | Reed's matriculants across the entire country (as you can see in Figure
172 | 2 below).
173 | 
174 | \begin{figure}[htbp]
175 | \centering
176 | \includegraphics{./reproducible_research_files/figure-latex/unnamed-chunk-4.pdf}
177 | \caption{Domestic Geographic Distribution of 2013 Entering Class (Log
178 | Transformed)}
179 | \end{figure}
180 | 
181 | \end{document}
182 | 


--------------------------------------------------------------------------------
/tutorials/rvest.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "rvest Introduction"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | We can use the rvest package to scrape information from the internet into R.  
 8 | For example, this [page](http://www.reed.edu/ir/geographic_states.html) on Reed College's Institutional Research website contains a large table with data that we may want to analyze.  Instead of trying to copy this data into Excel or having to manually recreate it, we can use rvest to pull the information directly into R.
 9 | 
10 | 
11 | ```{r, warning=FALSE}
12 | 
13 | # install packages 
14 | pkg <- c("rvest", "dplyr",  "reshape2", "googleVis",  "magrittr")
15 | 
16 | new.pkg <- pkg[!(pkg %in% installed.packages())]
17 | 
18 | if (length(new.pkg)) {
19 |   install.packages(new.pkg)
20 | }
21 | 
22 | 
23 | # load packages 
24 | suppressMessages(library(rvest))
25 | suppressMessages(library(dplyr))
26 | suppressMessages(library(reshape2))
27 | suppressMessages(library(googleVis))
28 | 
29 | # helpful resources for using rvest 
30 |   # vignette("selectorgadget")
31 |   # http://blog.rstudio.org/2014/11/24/rvest-easy-web-scraping-with-r/
32 | ```
33 | 
34 | **Read in data**
35 | ```{r}
36 | 
37 | # download html file
38 | webpage <- html("http://www.reed.edu/ir/geographic_states.html")
39 | 
40 | # the data we want is in the first table on this page
41 | # the html_table() command coerces the data into a data frame
42 | webpage %>%
43 |   html_nodes("table") %>%
44 |   .[[1]] %>%
45 |   html_table()
46 | ```
47 | 
48 | ```{r, warning=FALSE}
49 | # repeat above code but store results in a data frame
50 | data <- 
51 | webpage %>%
52 |   html_nodes("table") %>%
53 |   .[[1]] %>%
54 |   html_table()
55 | ```
56 | 
57 | 
58 | ```{r, warning=FALSE}
59 | # we can now work with this data from the web as a data frame in R
60 | # remove total row from data 
61 | data <- 
62 |   data %>% 
63 |   filter(State!='Total')
64 | 
65 | # reshape data for plotting 
66 | data_long <- melt(data, id='State')
67 | 
68 | # rename columns in long data frame 
69 | colnames(data_long) <- c('State', 'Year', 'Matriculants')
70 | 
71 | # create and manipulate variables for plotting
72 | data_long$Year <- as.numeric(as.character(data_long$Year))
73 | data_long$year <- data_long$Year
74 | data_long$state <- data_long$State
75 | 
76 | 
77 | 
78 | 
79 | ```
80 | 
81 | ```{r, results='asis'}
82 | # plot data 
83 | gvisMotionChart(data_long, "state", "year",
84 |                 yvar="Matriculants", xvar="Year",
85 |                 colorvar="State")
86 | 
87 | 
88 | 
89 | ```
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/tutorials/scatter plots advanced.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Scatter Plots II"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: html_document
 5 | ---
 6 | 
 7 | <br>
 8 | <br>
 9 | 
10 | ####Install and load ggplot
11 | ```{r, warning=FALSE, message=FALSE}
12 | # install libraries 
13 | # This demo requires the 'ggplot' package 
14 | if( !is.element("ggplot2", installed.packages()[,1]) )
15 |   install.packages("ggplot2")
16 | 
17 | # load libraries 
18 | library(ggplot2)
19 | ```
20 | 
21 | <br>
22 | <br>
23 | 
24 | ####Download and Load Data
25 | ```{r}
26 | download.file("http://www.openintro.org/stat/data/evals.RData", destfile = "evals.RData")
27 | load("evals.RData")
28 | ```
29 | 
30 | <br>
31 | <br>
32 | 
33 | ####Create Scatter Plot Function
34 | ```{r, message=FALSE}
35 | 
36 | # this function will create a plot of every variable in your data frame against your dependent variable
37 | # the function takes two arguments: x = the name of your data frame and dv = the name of your dependent variable
38 | # if you want to save the scatter plots as .png files define the file path for graphs_folder below 
39 | # you will also need to remove the "#" from the two lines in the function that are commented out and place a "#" before print
40 | 
41 | # save graphs in this folder
42 | graphs_folder <- 'filepath/graphs/'
43 | 
44 | scatter <- function(x, dv, na.rm = TRUE, ...){
45 |   nm <- names(x)
46 |   for (i in seq_along(nm)) {
47 |   print(ggplot(x, aes_string(x = nm[i], y = dv)) + geom_point(color="dark blue") + theme_classic() +  stat_smooth(method = "lm", se = FALSE, size = 2, color="dark red"))}
48 |     # plots <- ggplot(x, aes_string(x = nm[i], y = dv)) + geom_point(color="blue") + theme_classic() + stat_smooth(method = "lm", se = FALSE, size = 2)
49 |     # ggsave(plots,filename=paste(graphs_folder, "scatter_",nm[i],".png",sep=""))}
50 | }
51 | 
52 | ```
53 | 
54 | <br>
55 | <br>
56 | 
57 | ####Run Scatter Plot Function to Create Scatter Plots for an Entire Data Frame
58 | ```{r, message=FALSE, warning=FALSE}
59 | scatter(x = evals, dv = "score") 
60 | 
61 | ```
62 | 


--------------------------------------------------------------------------------
/tutorials/scatter plots.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Scatter Plots"
  3 | author: "Reed College, Instructional Technology Services"
  4 | output: html_document
  5 | ---
  6 | 
  7 | <br>
  8 | <br>
  9 | 
 10 | ####Create Data Frame of Majors and FTE by Department at Reed College
 11 | ```{r}
 12 | 
 13 | Departments = c('Art' , 'Music', 'Theatre', 'Anthropology', 'Economics',
 14 |                 'History', 'Political Science', 'Sociology', 'Chinese',  'Classics',
 15 |                 'English', 'French', 'German', 'Russian', 'Spanish',
 16 |                 'Biology', 'Chemistry', 'Mathematics', 'Physics', 'Linguistics',
 17 |                 'Philosophy', 'Psychology', 'Religion')
 18 | 
 19 | Majors = c(58, 21, 16, 52, 56,
 20 |           57, 68, 28, 6, 20, 
 21 |           150, 5, 2, 7, 3, 
 22 |           153.5, 74, 72.5, 125, 45,
 23 |           75, 98, 25)
 24 | 
 25 | FTE = c(7.8, 4, 6.25, 5, 5.6, 
 26 |         8.7, 5.5, 3, 3, 4, 
 27 |         12, 5, 3, 3, 5, 
 28 |         9, 6.8, 8, 6, 4, 
 29 |         5.7, 7.7, 4)
 30 | 
 31 | data <- data.frame(Departments, Majors, FTE)
 32 | 
 33 | # Data does not include 94 interdisciplinary majors and 40 undecided majors.  
 34 | # Majors like bio/chem are split between the two departments 
 35 | # General Lit majors are included with English 
 36 | # Dance majors and faculty are included with Theatre
 37 | # Major Data: http://www.reed.edu/ir/ir_internal_web/intendedmajors.html and FTE Data: http://www.reed.edu/ir/facfte.html
 38 | 
 39 | ```
 40 | 
 41 | <br>
 42 | <br>
 43 | 
 44 | ####Create Scatter Plot using Base R Commands
 45 | ```{r}
 46 | plot(data$Majors, data$FTE)
 47 | ```
 48 | 
 49 | <br>
 50 | <br>
 51 | 
 52 | ####Add Additional Elements to Base Scatter Plot
 53 | ```{r}
 54 | plot(data$Majors, data$FTE, 
 55 |      xlab = "Majors", ylab = "FTE",  main = "Reed College Majors and FTE by Deparment ", pch = 16, # Add labels
 56 |      xlim = c(0, 160), ylim = c(0, 15),  # set limits on x-axis and y-axis
 57 |      col = "dark blue", cex = 2) # change color and size of points
 58 | 
 59 | 
 60 | ```
 61 | 
 62 | <br>
 63 | <br>
 64 | 
 65 | 
 66 | ####Add A Fitted Line
 67 | ```{r}
 68 | plot(data$Majors, data$FTE, 
 69 |      xlab = "Majors", ylab = "FTE",  main = "Reed College Majors and FTE by Deparment", pch = 16, # Add labels
 70 |      xlim = c(0, 160), ylim = c(0, 15),  # set limits on x-axis and y-axis
 71 |      col = "dark blue", cex = 2) # change color and size of points
 72 | 
 73 | abline(lm(data$FTE~data$Majors), col="dark red", lwd = 2)  # add fitted regression line (y~x) 
 74 | 
 75 | ```
 76 | 
 77 | 
 78 | <br>
 79 | <br>
 80 | 
 81 | 
 82 | ####Using ggplot2 to Make a Scatter Plot
 83 | ```{r}
 84 | # This demo requires the 'ggplot' package 
 85 | if( !is.element("ggplot2", installed.packages()[,1]) )
 86 |   install.packages("ggplot2")
 87 | 
 88 | suppressPackageStartupMessages(library(ggplot2))
 89 | 
 90 | ## Base scatter plot in ggplot 
 91 | ggplot(data, aes(x=Majors, y=FTE)) + 
 92 |   geom_point(shape=1)
 93 | 
 94 | ```
 95 | 
 96 | 
 97 | <br>
 98 | <br>
 99 | 
100 | ####Apply Theme to  Scatter Plot
101 | ```{r}
102 | ggplot(data, aes(x=Majors, y=FTE)) + 
103 |   geom_point(shape=1)+
104 |   theme_bw()
105 | 
106 | 
107 | 
108 | ```
109 | 
110 | <br>
111 | <br>
112 | 
113 | ####Add Additional Elements to Base Scatter Plot
114 | ```{r}
115 | ggplot(data, aes(x=Majors, y=FTE)) +  
116 |   geom_point(color="dark blue", size=3) +   # change the color and size of points 
117 |   geom_smooth(method=lm, se=FALSE, color="dark red") + # add a fitted line 
118 |   scale_y_continuous(limits = c(0, 12)) + # apply limits to the y-axis
119 |   ggtitle("Reed College Majors and FTE by Deparment") + # add a title to the plot 
120 |   theme_bw()
121 | 
122 | 
123 | ```
124 | 
125 | <br>
126 | <br>
127 | 
128 | ####Make Your Scatter Plot Interactive with googleVis
129 |  
130 | ```{r, warning=FALSE}
131 | ## This demo requires the 'googleVis' package 
132 | if( !is.element("googleVis", installed.packages()[,1]) )
133 |   install.packages("googleVis")
134 | 
135 | suppressPackageStartupMessages(library(googleVis))
136 | 
137 | # make a new data frame with only two columns to scatter plot 
138 | keep <- c('Majors', 'FTE')
139 | data2 <- data[keep]
140 | 
141 | # add names to new data frame as factor 
142 | data2$pop.html.tooltip=data$Departments
143 | 
144 | # create interactive scatter plot using googleVis
145 | Scatter1 <- gvisScatterChart(data2,                                                           
146 |                             options=list(tooltip="{isHtml:'True'}",              # Define tooltip                            
147 |                               legend="none", lineWidth=0, pointSize=5,                                                     
148 |                               vAxis="{title:'Faculty (Total FTE)'}",             # y-axis label                
149 |                               hAxis="{title:'Majors (delared and intended)'}",   # x-axis label                     
150 |                               width=750, height=500))                            # plot dimensions                                              
151 | ```
152 | 
153 | ```{r, results = 'asis'}
154 | # plot interactive scatter (use 'plot(Scatter1)' to view in RStudio)
155 | print(Scatter1, 'chart') 
156 | 
157 | ```
158 | 
159 | 
160 | <br>
161 | <br>
162 | 
163 | 
164 | ####Add Additional Elements to Interactive Scatter Plot
165 | ```{r, warning=FALSE}
166 | 
167 | # create interactive scatter plot using googleVis
168 | Scatter2 <- gvisScatterChart(data2,                                                           
169 |                             options=list(
170 |                               explorer="{actions: ['dragToZoom', 
171 |                                           'rightClickToReset'],
172 |                                            maxZoomIn:0.05}",
173 |                               #chartArea="{width:'85%',height:'80%'}",
174 |                               tooltip="{isHtml:'True'}",              
175 |                               crosshair="{trigger:'both'}",                         
176 |                               legend="none", lineWidth=0, pointSize=5,                                                     
177 |                               vAxis="{title:'Faculty (Total FTE)'}",                        
178 |                               hAxis="{title:'Majors (delared and intended)'}",                     
179 |                               width=750, height=500))                                                                        
180 | ```
181 | 
182 | 
183 | ```{r, results = 'asis'}
184 | print(Scatter2, 'chart') 
185 | 
186 | ```
187 | 
188 | Left-click and drag to select an area of the chart to zoom-in on. 
189 |   
190 | 


--------------------------------------------------------------------------------
/tutorials/summary_statistics.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Summarizing your Data in RSudio"
 3 | date: Reed College, Instructional Technology Services
 4 | output: pdf_document
 5 | ---
 6 |    
 7 | **Load the mtcars data**   
 8 | ```{r}
 9 | data(mtcars)
10 | ```
11 | 
12 | **List summary statistics of each variable** 
13 | ```{r}
14 | summary(mtcars)
15 | ```
16 | 
17 | **List summary statistics for one variable** 
18 | ```{r}
19 | summary(mtcars$mpg)
20 | ```
21 | 
22 | **List summary statistics for several variables**
23 | ```{r}
24 | summary(mtcars[,c(1,2,4:6)])
25 | ```
26 | 
27 | **Using the describe command**
28 | ```{r}
29 | # install.packages("psych")
30 | library(psych)
31 | ```
32 | 
33 | **Describe each variable**
34 | ```{r}
35 | describe(mtcars)
36 | ```
37 | 
38 | **Describe one variable** 
39 | ```{r}
40 | describe(mtcars$mpg)
41 | ```
42 | 
43 | **Describe several variables** 
44 | ```{r}
45 | describe(mtcars[,c(1, 4:6)])
46 | ```
47 | 
48 | **Describe variables by a grouping variable**
49 | ```{r}
50 | describeBy(mtcars$mpg, mtcars$cyl)
51 | ```
52 | 


--------------------------------------------------------------------------------
/tutorials/summary_statistics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/summary_statistics.pdf


--------------------------------------------------------------------------------
/tutorials/ttests_pdf.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "T-Tests in R"
 3 | author: "Reed College, Instructional Technology Services"
 4 | output: pdf_document
 5 | ---
 6 | 
 7 | **Load data**
 8 | ```{r}
 9 | data(mtcars)
10 | ```
11 | 
12 | **One sample t-test**
13 | ```{r}
14 | t.test(mtcars$mpg, mu=50) # Ho: mu=3
15 | ```
16 | \newpage
17 | 
18 | **Independent two sample t-test by groups**
19 | ```{r}
20 | t.test(mtcars$mpg ~ mtcars$am) 
21 | ```
22 | \newpage
23 | 
24 | **Independent 2-group t-test**
25 | ```{r}
26 | mpg1 <- sample(mtcars$mpg, 10, replace=F)
27 | mpg2 <- sample(mtcars$mpg, 10, replace=F)
28 | 
29 | t.test(mpg1, mpg2) 
30 | 
31 | # possible options: 
32 |   # paired = TRUE
33 |   # var.equal = TRUE (pooled variable estimate)
34 |   # alternative="less" or alternative="greater" (one tail tests)
35 | ```
36 | 
37 | 


--------------------------------------------------------------------------------
/tutorials/ttests_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/ttests_pdf.pdf


--------------------------------------------------------------------------------