├── assment_one.Rmd
├── auto_reporting.R
├── auto_reporting.Rmd
├── iterative_reporting
├── markdown_multiple_reports.Rmd
├── markdown_multiple_reports.html
├── r-tmp
│ ├── loop.R
│ ├── report.Rmd
│ └── reports
│ │ ├── test_report_doc1_2018-04-05.docx
│ │ └── test_report_doc2_2018-04-05.docx
├── r_script.R
└── rmarkdown_script.Rmd
├── olympics.csv
├── portland mapping
├── .Rapp.history
├── GISwR.pdf
├── ggmaps.R
└── mapping in r.R
├── rater reliability .R
├── read in ipeds data.R
├── read_googlesheet.R
├── read_multiple_txt_files.R
├── rename_colnames_base_on_crosswalk
├── rmaps.R
├── rvest.R
├── ts_graphs.R
└── tutorials
├── Histograms advanced.Rmd
├── Histograms_advanced.html
├── ames.csv
├── colors.Rmd
├── colors.html
├── create_variables.Rmd
├── create_variables.html
├── creating html tables.Rmd
├── creating_html_tables.html
├── dplyr.Rmd
├── dplyr.html
├── evals.RData
├── excel.Rmd
├── excel.html
├── facets.Rmd
├── facets.html
├── geocoder.Rmd
├── geocoder.html
├── geocoding.Rmd
├── geocoding.html
├── histograms.Rmd
├── histograms.html
├── histograms_I.Rmd
├── histograms_I.html
├── histograms_pdf.Rmd
├── histograms_pdf.pdf
├── line graphs advanced.Rmd
├── line graphs.Rmd
├── line_graphs.html
├── line_graphs_advanced.html
├── load_data.Rmd
├── load_data.html
├── loops_with_ggplot2.Rmd
├── loops_with_ggplot2.html
├── markdown_formatting.Rmd
├── markdown_formatting.html
├── matriculants.Rmd
├── matriculants.html
├── missing_data.Rmd
├── missing_data.html
├── my-report.html
├── read_and_summarize_multiple_txt.Rmd
├── read_and_summarize_multiple_txt.html
├── reading_and_writing.Rmd
├── reading_and_writing.html
├── reading_and_writing.pdf
├── regex.Rmd
├── regex.html
├── reordering_geom_bar.Rmd
├── reordering_geom_bar.html
├── reproducible_research.Rmd
├── reproducible_research.pdf
├── reproducible_research.tex
├── rvest.Rmd
├── rvest.html
├── scatter plots advanced.Rmd
├── scatter plots.Rmd
├── scatter_plots.html
├── scatter_plots_advanced.html
├── summary_statistics.Rmd
├── summary_statistics.html
├── summary_statistics.pdf
├── ttests_pdf.Rmd
└── ttests_pdf.pdf
/assment_one.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Assessment I: Sports Analytics Jan Plan"
3 | name: ""
4 | output: html_document
5 | ---
6 |
7 | ```{r setup, include=FALSE}
8 | knitr::opts_chunk$set(echo = TRUE)
9 | ```
10 |
11 | ## Section 1: Loading Packages and Data
12 |
13 | ##### Load the tidyverse library
14 | ```{r}
15 | ```
16 |
17 | ##### Load the Olympic medals data into a data frame called olympics
18 |
19 | ```{r, echo=FALSE}
20 |
21 | ```
22 |
23 | Here's some info about variables in the data:
24 |
25 | - ID - Unique number for each athlete
26 | - Name - Athlete's name
27 | - Sex - M or F
28 | - Age - Integer
29 | - Height - In centimeters
30 | - Weight - In kilograms
31 | - Team - Team name
32 | - NOC - National Olympic Committee 3-letter code
33 | - Games - Year and season
34 | - Year - Integer
35 | - Season - Summer or Winter
36 | - City - Host city
37 | - Sport - Sport
38 | - Event - Event
39 | - Medal - Gold, Silver, Bronze, or NA
40 |
41 | ## Section 2: Describing the data
42 |
43 | ##### How many rows are in the data?
44 |
45 | ##### How many columns are in the data?
46 |
47 |
48 |
49 | ## Section 3: Country (NOC) Medal Counts
50 |
51 | ##### Which country (NOC) has won the most medals?
52 | ```{r}
53 |
54 | ```
55 |
56 | ##### Which country has won the most medals in the winter games?
57 | ```{r}
58 |
59 | ```
60 |
61 | ## Section 4: Sport Medal Counts
62 |
63 | ##### Which sport has awarded the fewest medals?
64 | ```{r}
65 |
66 | ```
67 |
68 | ##### In how many Olympic Games were medals awarded in the sport of Tug-Of-War?
69 | ```{r}
70 |
71 | ```
72 |
73 |
74 | ## Section 5: BMI
75 |
76 | ##### Which gold medal winner has the highest BMI? (BMI = kg/m^2)
77 | ```{r}
78 |
79 | ```
80 |
81 |
82 | ## Section 6: Graphing
83 |
84 | ##### Create a box of the age of medal winners by sex
85 | ```{r}
86 |
87 | ```
88 |
89 | ##### Who is that person who won a medal when they were 10?
90 | ```{r}
91 |
92 | ```
93 |
94 | ##### Facet the boxplot by Medal
95 | ```{r}
96 |
97 | ```
98 |
99 |
100 | ## Section 7: Bonus
101 |
102 | ##### Which cities have hosted the games multiple times?
103 | ```{r}
104 |
105 | ```
106 |
107 |
108 |
--------------------------------------------------------------------------------
/auto_reporting.R:
--------------------------------------------------------------------------------
1 | # Reference
2 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
3 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
4 | # http://willchernoff.com/2013/04/23/periodically-run-an-r-script-as-a-background-process-using-launchd-under-osx/
5 |
6 | library(knitr)
7 | library(markdown)
8 | library(rmarkdown)
9 | library(stringr)
10 | library(ggmap)
11 |
12 | setwd('/Users/majerus/Desktop/R/auto_reporting/test/reports/')
13 |
14 | ## knitr loop
15 | mtcars <- mtcars[c(1,5),]
16 | rownames(mtcars) <- str_replace_all(rownames(mtcars), ' ', '')
17 |
18 | map <-
19 | get_map(location="United States",
20 | source= 'google', maptype = 'terrain', color='bw', zoom=4)
21 |
22 |
23 | for (car in unique(rownames(mtcars))){
24 | # knit2pdf("testingloops.Rnw", output=paste0('report_', hosp, '.tex'))
25 |
26 | #knit("/Users/majerus/Desktop/R/auto_reporting/test/reports/r_script_pdf.Rmd")
27 |
28 | # output folders
29 | folder <-
30 | if(mtcars$cyl[rownames(mtcars)==car] == 4) {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl4/"}
31 | else if(mtcars$cyl[rownames(mtcars)==car] == 6) {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl6/"}
32 | else {"/Users/majerus/Desktop/R/auto_reporting/test/reports/cyl8/"}
33 |
34 | render(input = 'r_script_pdf.Rmd',
35 | output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
36 | output_dir = folder
37 | )
38 |
39 |
40 |
41 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd",
42 | # output_format = "pdf_document",
43 | # output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
44 | # output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
45 | #
46 | }
47 |
48 |
49 |
50 | # quit(save="no")
51 |
52 | # rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd',
53 | # output_file = paste("report_", Sys.Date(), ".html", sep=''),
54 | # output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
55 | #
56 |
57 |
58 |
59 | #
60 | # knit("/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd")
61 | #
62 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd",
63 | # output_format = "pdf_document",
64 | # output_file = paste("test_report_", Sys.Date(), ".pdf", sep=''),
65 | # output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/")
66 | #
67 | #
68 | # # render(input, output_format = NULL, output_file = NULL, output_dir = NULL,
69 | # # output_options = NULL, intermediates_dir = NULL,
70 | # # runtime = c("auto", "static", "shiny"),
71 | # # clean = TRUE, envir = parent.frame(), quiet = FALSE,
72 | # # encoding = getOption("encoding"))
73 | #
74 | #
75 | #
76 | # ## for html
77 | # markdownToHTML("r_script.md",
78 | # paste("/Users/majerus/Desktop/R/auto_reporting/test/reports/test_report_", Sys.Date(), ".html", sep='')
79 | # #stylesheet="C:/Users/Rich/Dropbox/tca/Admissions/FM/Project12/yield_model_2014/Dashboard/dashboardcss.css"
80 | # )
81 |
--------------------------------------------------------------------------------
/auto_reporting.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Example Car Sales Report"
3 | author: "Rich Majerus"
4 | output: pdf_document
5 | ---
6 |
7 | ```{r setup, include=FALSE}
8 | # set global chunk options
9 | # opts_chunk$set(cache=FALSE)
10 | library(ggplot2)
11 | library(dplyr)
12 | library(ggmap)
13 | library(knitr)
14 | library(markdown)
15 | library(rmarkdown)
16 | library(xtable)
17 | ```
18 |
19 | ```{r, echo=FALSE}
20 | #car <- 'Mazda RX4' # for testing
21 | cars <- mtcars[rownames(mtcars)==car,]
22 |
23 | # create daily data
24 | x <- sample(1:10, 1)
25 |
26 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
27 |
28 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
29 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
30 |
31 | ```
32 |
33 | Today is `r Sys.Date()`.
34 |
35 | Today we sold `r nrow(cars)` `r car`.
36 |
37 |
38 |
39 | Today we sold cars to people in the following locations:
40 |
41 |
42 |
43 | ```{r, echo=FALSE, message = FALSE}
44 | # map of cars sold
45 | #map <- # now included in r source file so it is only run once
46 | #get_map(location="United States",
47 | #source= 'google', maptype = 'terrain', color='bw', zoom=4)
48 |
49 | ggmap(map) +
50 | geom_point(aes(x = lon, y = lat), data = cars, alpha = 1, color="darkred", size = 3) +
51 | scale_size_area(20)
52 |
53 |
54 | ```
55 |
56 |
57 |
58 | Here is a data table of the cars we sold.
59 |
60 |
61 |
62 | ```{r xtable, echo=FALSE, results='asis', message=FALSE}
63 | # data table of cars sold
64 |
65 | table <- xtable(cars[,c(1:2, 12:13)])
66 | print(table, type="latex", comment = FALSE)
67 |
68 |
69 | ```
70 |
71 |
72 |
73 | \newpage
74 |
75 | Here is a plot of mpg vs. weight for the sold cars:
76 |
77 |
78 |
79 | ```{r, echo=FALSE}
80 |
81 | # plot
82 |
83 | ggplot(cars, aes(mpg, wt)) +
84 | geom_point(position = position_jitter(w = 0.1, h = 0.1))
85 |
86 |
87 | ```
88 |
89 |
90 |
--------------------------------------------------------------------------------
/iterative_reporting/markdown_multiple_reports.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Create Multiple Reports with RMarkdown"
3 | author: "Rich Majerus"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | ## Introduction
10 | Using two files (an r script and an rmarkdown script) we can create a series of reports. The following example creates five reports using the mtcars data. The rmarkdown file is called by the rscript one time for each unique car name in the mtcars data. An example of the reports produced by these files can be found here.
11 |
12 | The rscript and rmarkdown script referenced in this tutorial can be found here along with the rmarkdown file that creates this tutorial.
13 |
14 |
15 |
16 | ## File 1: R Script
17 |
18 | ```{r, eval=FALSE}
19 |
20 | # References for automation
21 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
22 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
23 |
24 | # File 1: Should be an R-Script
25 | # contains a loop that iteratively calls an Rmarkdown file (i.e. File 2)
26 |
27 | # load packages
28 | library(knitr)
29 | library(markdown)
30 | library(rmarkdown)
31 |
32 | # use first 5 rows of mtcars as example data
33 | mtcars <- mtcars[1:5,]
34 |
35 | # create map to plot data on (this is outside the loop so it is only called once)
36 | map <-
37 | get_map(location="United States",
38 | source= 'google', maptype = 'terrain', color='bw', zoom=4)
39 |
40 | # for each type of car in the data create a report
41 | # these reports are saved in output_dir with the name specified by output_file
42 | for (car in unique(rownames(mtcars))){
43 | rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd', # file 2
44 | output_file = paste("report_", car, '_', Sys.Date(), ".html", sep=''),
45 | output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
46 |
47 | # for pdf reports
48 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd",
49 | # output_format = "pdf_document",
50 | # output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
51 | # output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
52 |
53 | }
54 | ```
55 |
56 |
57 |
58 | ## File 2: Rmarkdown
59 |
60 | ```{r, eval=FALSE}
61 | # load packages
62 | library(ggplot2)
63 | library(dplyr)
64 | library(ggmap)
65 | library(knitr)
66 | library(markdown)
67 | library(rmarkdown)
68 | library(xtable)
69 | ```
70 |
71 | ```{r, eval=FALSE}
72 |
73 | # limit data to car name that is currently specified by the loop
74 | cars <- mtcars[rownames(mtcars)==car,]
75 |
76 | # create example data for each car
77 | x <- sample(1:10, 1)
78 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
79 |
80 | # create hypotheical lat and lon for each row in cars
81 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
82 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
83 |
84 | ```
85 |
86 | Today is `r Sys.Date()`.
87 |
88 |
89 |
90 | Today we sold cars to people in the following locations:
91 |
92 | ```{r, eval=FALSE}
93 |
94 | # print map with car loactions
95 | ggmap(map) +
96 | geom_point(aes(x = lon, y = lat), data = cars,
97 | alpha = 1, color="darkred", size = 3)
98 |
99 |
100 | ```
101 |
102 |
103 |
104 | Here is a data table of the cars we sold.
105 |
106 |
107 |
108 | ```{r xtable, eval=FALSE}
109 |
110 | # data table of cars sold
111 | table <- xtable(cars[,c(1:2, 12:13)])
112 | print(table, type="latex", comment = FALSE)
113 |
114 |
115 | ```
116 |
117 |
118 |
119 | \newpage
120 |
121 | Here is a plot of mpg vs. weight for the sold cars:
122 |
123 |
124 | ```{r, eval=FALSE}
125 | # plot of mpg vs. wt for cars sold
126 |
127 | ggplot(cars, aes(mpg, wt)) +
128 | geom_point(position = position_jitter(w = 0.1, h = 0.1))
129 |
130 |
131 | ```
132 |
133 |
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/loop.R:
--------------------------------------------------------------------------------
1 | # load libraries
2 | library(tidyverse)
3 | library(rmarkdown)
4 | library(randomNames)
5 |
6 | # create sample data
7 | tmp <- tibble(
8 | doc = c(rep("doc1", 5),
9 | rep("doc2", 5)),
10 | name = randomNames(10)
11 | )
12 |
13 |
14 | for (i in unique(tmp$doc)){
15 |
16 | rmarkdown::render(input = paste0(getwd(), "/report.rmd"),
17 | output_file = paste0("test_report_", i, "_", Sys.Date(), ".docx", sep=''),
18 | output_dir = paste0(getwd(), "/reports/")
19 | )
20 |
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/report.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "report"
3 | author: "Rich Majerus"
4 | date: "4/5/2018"
5 | output: word_document
6 | ---
7 |
8 |
9 | Doc: `r i`
10 | Names: `r cat(tmp$name, sep = "\n")`
11 |
12 |
--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/reports/test_report_doc1_2018-04-05.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/iterative_reporting/r-tmp/reports/test_report_doc1_2018-04-05.docx
--------------------------------------------------------------------------------
/iterative_reporting/r-tmp/reports/test_report_doc2_2018-04-05.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/iterative_reporting/r-tmp/reports/test_report_doc2_2018-04-05.docx
--------------------------------------------------------------------------------
/iterative_reporting/r_script.R:
--------------------------------------------------------------------------------
1 | # Reference
2 | # http://www.r-bloggers.com/how-to-source-an-r-script-automatically-on-a-mac-using-automator-and-ical/
3 | # http://www.engadget.com/2013/03/18/triggering-applescripts-from-calendar-alerts-in-mountain-lion/
4 |
5 | library(knitr)
6 | library(markdown)
7 | library(rmarkdown)
8 |
9 |
10 | ## knitr loop
11 |
12 | mtcars <- mtcars[1:2,]
13 |
14 | map <-
15 | get_map(location="United States",
16 | source= 'google', maptype = 'terrain', color='bw', zoom=4)
17 |
18 | for (car in unique(rownames(mtcars))){
19 | rmarkdown::render('/Users/majerus/Desktop/R/auto_reporting/test/r_script.Rmd',
20 | output_file = paste("report_", car, '_', Sys.Date(), ".html", sep=''),
21 | output_dir = '/Users/majerus/Desktop/R/auto_reporting/test/reports')
22 | # for pdf reports
23 | # rmarkdown::render(input = "/Users/majerus/Desktop/R/auto_reporting/test/r_script_pdf.Rmd",
24 | # output_format = "pdf_document",
25 | # output_file = paste("test_report_", car, Sys.Date(), ".pdf", sep=''),
26 | # output_dir = "/Users/majerus/Desktop/R/auto_reporting/test/reports")
27 |
28 | }
29 |
30 |
31 |
--------------------------------------------------------------------------------
/iterative_reporting/rmarkdown_script.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Example Car Sales Report"
3 | author: "Rich Majerus"
4 | output: html_document
5 | ---
6 |
7 | ```{r, echo=FALSE}
8 | suppressWarnings(suppressPackageStartupMessages(library(ggplot2)))
9 | suppressWarnings(suppressPackageStartupMessages(library(dplyr)))
10 | suppressWarnings(suppressPackageStartupMessages(library(leaflet)))
11 | suppressWarnings(suppressPackageStartupMessages(library(DT)))
12 | suppressWarnings(suppressPackageStartupMessages(library(stringr)))
13 | library(knitr)
14 | library(markdown)
15 | library(rmarkdown)
16 | ```
17 |
18 | ```{r, echo=FALSE}
19 | cars <- mtcars[rownames(mtcars)==car,]
20 |
21 | # create daily data
22 | x <- sample(1:10, 1)
23 |
24 | cars <- do.call("rbind", replicate(x, cars, simplify = FALSE))
25 |
26 | cars$lat <- sapply(rownames(cars), function(x) round(runif(1, 30, 46), 3))
27 | cars$lon <- sapply(rownames(cars), function(x) round(runif(1, -115, -80),3))
28 |
29 | ```
30 |
31 | Today is `r Sys.Date()`.
32 |
33 | Today we sold `r nrow(cars)` `r car`.
34 |
35 |
36 |
37 | Today we sold cars to people in the following locations:
38 | ```{r, echo=FALSE}
39 | # map of cars sold
40 | leaflet(cars) %>%
41 | addTiles() %>%
42 | setView(-93.65, 42.0285, zoom = 3) %>%
43 | addCircles(cars$lon, cars$lat)
44 | ```
45 |
46 |
47 |
48 | Here is a data table of the cars we sold.
49 | ```{r, echo=FALSE}
50 | # data table of cars sold
51 | datatable(cars[,c(1:2, 12:13)])
52 | ```
53 |
54 |
55 |
56 | Here is a plot of mpg vs. weight for the sold cars:
57 | ```{r, echo=FALSE}
58 |
59 | # plot
60 | ggplot(cars, aes(mpg, wt)) + geom_point(position = position_jitter(w = 0.1, h = 0.1)) + stat_smooth(method="lm", se=TRUE)
61 |
62 | ```
63 |
64 |
65 |
--------------------------------------------------------------------------------
/portland mapping/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/portland mapping/.Rapp.history
--------------------------------------------------------------------------------
/portland mapping/GISwR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/portland mapping/GISwR.pdf
--------------------------------------------------------------------------------
/portland mapping/ggmaps.R:
--------------------------------------------------------------------------------
1 | # open street map of reed
2 |
3 | gps <- read.csv("/Users/majerus/Desktop/2014 projects/portland mapping/elwyn.csv",
4 | header = TRUE)
5 |
6 | library(ggmap)
7 |
8 | ## Google Maps
9 |
10 | # satelite
11 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
12 | lat = 33.824),
13 | color = "color", # or bw
14 | source = "google",
15 | maptype = "satellite",
16 | zoom = 17)
17 |
18 |
19 | # terrain
20 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
21 | lat = 33.824),
22 | color = "color", # or bw
23 | source = "google",
24 | maptype = "terrain",
25 | zoom = 17)
26 |
27 |
28 | # roadmap
29 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
30 | lat = 33.824),
31 | color = "color", # or bw
32 | source = "google",
33 | maptype = "roadmap",
34 | zoom = 17)
35 |
36 | # hybrid
37 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
38 | lat = 33.824),
39 | color = "color", # or bw
40 | source = "google",
41 | maptype = "hybrid",
42 | zoom = 17)
43 |
44 | ## open street map
45 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
46 | lat = 33.824),
47 | color = "color", # or bw
48 | source = "osm",
49 | zoom = 17)
50 |
51 |
52 | ## stamen
53 |
54 | # terrain
55 | mapImageData <- get_map(location = c(lon = mean(gps$Longitude),
56 | lat = 33.824),
57 | color = "color", # or bw
58 | source = "stamen",
59 | maptype = "terrain",
60 | zoom = 17)
61 |
62 |
63 |
64 | pathcolor <- "#F8971F"
65 |
66 | ggmap(mapImageData,
67 | extent = "device", # "panel" keeps in axes, etc.
68 | ylab = "Latitude",
69 | xlab = "Longitude",
70 | legend = "right") +
71 |
72 | geom_path(aes(x = Longitude, # path outline
73 | y = Latitude),
74 | data = gps,
75 | colour = "black",
76 | size = 2) +
77 |
78 | geom_path(aes(x = Longitude, # path
79 | y = Latitude),
80 | colour = pathcolor,
81 | data = gps,
82 | size = 1.4) # +
83 | # labs(x = "Longitude",
84 | # y = "Latitude") # if you do extent = "panel"
85 |
86 |
87 | mapImageData <- get_map(location = c(lon = -122.630091,
88 | lat = 45.480740),
89 | color = "color", # or bw
90 | source = "google",
91 | maptype = "satellite",
92 | zoom = 17)
93 |
94 |
95 | ls(data)
96 | attach(mydata)
97 | plot(x, y) # scatterplot
98 | identify(x, y, labels=row.names(mydata)) # identify points
99 | coords <- locator(type="l") # add lines
100 | coords # display list
101 |
102 | ls(data)
103 | attach(mydata)
104 | plot(data$admit_rate, data$grad_rate) # scatterplot
105 | identify(data$admit_rate, data$grad_rate, labels=row.names(data)) # identify points
106 | coords <- locator(type="l") # add lines
107 | coords # display list
108 |
109 |
110 |
--------------------------------------------------------------------------------
/portland mapping/mapping in r.R:
--------------------------------------------------------------------------------
1 | # http://www.r-bloggers.com/the-openstreetmap-package-opens-up/
2 | # http://rpubs.com/RobinLovelace/12696
3 | library(osmar)
4 | library(OpenStreetMap)
5 |
6 | src <- osmsource_api()
7 | bb <- center_bbox(-122.688068, 45.521032, 1000, 1000)
8 | ptown <- get_osm(bb, source = src)
9 | plot(ptown)
10 | points(-1.53492, 53.81934, col = "red", lwd = 5)
11 |
12 |
13 | bikePaths <- find(ptown, way(tags(k == "bicycle" & v == "yes")))
14 | bikePaths <- find_down(ptown, way(bikePaths))
15 | bikePaths <- subset(ptown, ids = bikePaths)
16 | plot(ptown)
17 | plot_ways(bikePaths, add = T, col = "red", lwd = 3)
18 |
19 |
20 | library(ggmap)
21 |
22 | stores <- data.frame(name=c("Commercial","Union","Bedford"),
23 | longitude=c(-70.25042295455933,-70.26050806045532,-70.27726650238037),
24 | latitude=c(43.657471302616806,43.65663299041943,43.66091757424481))
25 | location = c(-70.2954, 43.64278, -70.2350, 43.68093)
26 |
27 | # Fetch the map
28 | portland = get_map(location = location, source = "osm")
29 |
30 | # Draw the map
31 | portlandMap = ggmap(portland)
32 |
33 | # Add the points layer
34 | portlandMap = portlandMap + geom_point(data = stores, aes(x = longitude, y = latitude), size = 5)
35 |
36 | # Add the labels
37 | portlandMap + geom_text(data = stores, aes(label = name, x = longitude+.001, y = latitude), hjust = 0)
38 |
--------------------------------------------------------------------------------
/rater reliability .R:
--------------------------------------------------------------------------------
1 |
2 | # libraries ---------------------------------------------------------------
3 |
4 | # install irr library if not already installed
5 | if( !is.element("irr", installed.packages()[,1]) )
6 | install.packages("irr")
7 |
8 | #Load the irr library
9 | library(irr)
10 |
11 | # read in data and clean dataframe ----------------------------------------
12 |
13 | # read in full data file (update file path to match data location on your computer)
14 | original.data <- read.csv('/Users/majerus/Desktop/linguistics_data.csv')
15 |
16 | # drop summary statistics that are included in orginal file
17 | data <- subset(original.data, !is.na(original.data$Test.Number))
18 | data$Average.Score..RA <- NULL
19 | data$Average.Score..Participants <- NULL
20 | data$Difference..RA...Participants. <- NULL
21 | data$X <- NULL
22 |
23 | # drop extra vars
24 | data$Speaker <- NULL
25 | data$Speaker.From <- NULL
26 | data$Bows..Horizon <- NULL
27 | data$Test.Number <- NULL
28 |
29 | # make File name variable the rownames so that it is preserved as columns names once df is transposed
30 | rownames(data) <- data$File.Name
31 | data$File.Name <- NULL
32 |
33 | # calculate inter-rater reliability between Molly and Dean using Cohen's Kappa----------------
34 |
35 | ratings <- as.data.frame(cbind(Dean = data$RA..Dean, Molly = data$RA..Molly))
36 |
37 | # kappa2(ratings, weight = c("unweighted", "equal", "squared"), sort.levels = FALSE)
38 | kappa2(ratings)
39 |
40 |
41 |
42 | # calculate inter-rater reliability between all raters -------------------
43 |
44 | # transpose df
45 | data.t <- as.data.frame(t(data))
46 |
47 | # check class of each variable
48 | sapply(data.t, class)
49 |
50 | # convert df.t to matrix
51 | matrix <- data.matrix(data.t)
52 |
53 |
54 | # Krippendorff ’s alpha
55 |
56 | # kripp.alpha(x, method=c("nominal","ordinal","interval","ratio")) (need to select right data level for method)
57 | kripp.alpha(matrix, method=c("ratio"))
58 | kripp.alpha(matrix, method=c("nominal"))
59 |
60 |
61 | # Light’s Kappa
62 |
63 | # transform data to factors for s Light’s Kappa which requires categorical data
64 | sapply(data, class)
65 | data.factors <- as.data.frame(sapply(data, as.factor))
66 | kappam.light(data.factors)
67 |
68 |
69 | # Fleiss’ Kappa
70 |
71 | # Fleiss’ Kappa for m raters with categorical data
72 | kappam.fleiss(data.factors, detail = TRUE, exact = FALSE)
73 | kappam.fleiss(data.factors, detail = TRUE)
74 |
75 |
76 |
--------------------------------------------------------------------------------
/read in ipeds data.R:
--------------------------------------------------------------------------------
1 | install.packages(“devtools”)
2 |
3 | require(devtools)
4 | install_github('ipeds','jbryer')
5 |
6 | library(ipeds)
7 |
8 | ls('package:ipeds')
9 |
10 | data(surveys)
11 | names(surveys)
12 |
13 | downloadAllSurveys(2013)
14 |
15 |
--------------------------------------------------------------------------------
/read_googlesheet.R:
--------------------------------------------------------------------------------
1 | # survey url: https://docs.google.com/forms/d/1zLVTb8dix0tiWr0sVuRQAGsfjdMfQ5A5PmK_wDn9e7U/viewform?usp=send_form
2 |
3 | library(XML)
4 | library(httr)
5 |
6 | url <- "https://docs.google.com/spreadsheets/d/1CVQqfIEkbt9KUi3oxgE8_iQQnbN7CBIDqXqtNYSfsiw/pubhtml?gid=594213668&single=true"
7 |
8 | readSpreadsheet <- function(url, sheet = 1){
9 | library(httr)
10 | r <- GET(url)
11 | html <- content(r)
12 | sheets <- readHTMLTable(html, header=FALSE, stringsAsFactors=FALSE)
13 | df <- sheets[[sheet]]
14 | dfClean <- function(df){
15 | nms <- t(df[1,])
16 | names(df) <- nms
17 | df <- df[-1,-1]
18 | df <- df[df[,1] != "",] ## only select rows with time stamps
19 | row.names(df) <- seq(1,nrow(df))
20 | df
21 | }
22 | dfClean(df)
23 | }
24 |
25 | df <- readSpreadsheet(url)
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/read_multiple_txt_files.R:
--------------------------------------------------------------------------------
1 |
2 | # install, update and load packages -----------------------------------------------
3 |
4 | pkg <- c("stringr", "reshape2", "dplyr", "ggplot2", "magrittr")
5 |
6 | new.pkg <- pkg[!(pkg %in% installed.packages())]
7 |
8 | if (length(new.pkg)) {
9 | install.packages(new.pkg)
10 | }
11 |
12 | library(stringr)
13 | library(reshape2)
14 | library(dplyr)
15 | library(ggplot2)
16 |
17 |
18 | # Read in data ------------------------------------------------------------
19 | # update this file path to point toward appropriate folder on your computer
20 | folder <- "/Users/majerus/Desktop/thesis_projects/linguistics/Yevgeniy/exp1/" # path to folder that holds multiple .csv files
21 | file_list <- list.files(path=folder, pattern="*.txt") # create list of all .csv files in folder
22 |
23 | # read in each .csv file in file_list and rbind them into a data frame called data
24 | data <-
25 | do.call("rbind",
26 | lapply(file_list,
27 | function(x)
28 | cbind(file = x, read.table(paste(folder, x, sep=''),
29 | header = TRUE,
30 | stringsAsFactors = FALSE))))
31 |
32 |
33 | # Clean data --------------------------------------------------------------
34 |
35 | clean.data <- function(df){
36 | df <- cbind(df, colsplit(df$stimulus, ',', names = c('s1','s2', 's3')))
37 | df$answer <- ifelse(str_count(df$stimulus, 'A') == 2, 'A', 'B')
38 | df$correct <- ifelse(df$response == df$answer, 1, 0)
39 | df$reactionTime <- as.numeric(df$reactionTime)
40 | return(df)
41 | }
42 |
43 | data <- clean.data(data)
44 |
45 |
46 | # Write out data ----------------------------------------------------------
47 |
48 | write.csv(data, paste(folder,'cleaned_data.csv', sep = ''), row.names = FALSE)
49 |
50 |
51 | # Create data frame of summary statistics ---------------------------------
52 |
53 | summary_stats <-
54 | data %>%
55 | group_by(subject, correct, answer) %>%
56 | summarise(count = n(),
57 | mean_reactionTime = mean(reactionTime, na.rm = TRUE),
58 | sd_reactionTime = sd(reactionTime, na.rm = TRUE),
59 | min_reactionTime= min(reactionTime, na.rm = TRUE),
60 | max_reactionTime = max(reactionTime, na.rm = TRUE))
61 |
62 |
63 |
64 | # Write out data frame of summary statistics ------------------------------
65 |
66 | write.csv(summary_stats, paste(folder,'summary_stats.csv', sep = ''), row.names = FALSE)
67 |
68 |
69 |
--------------------------------------------------------------------------------
/rename_colnames_base_on_crosswalk:
--------------------------------------------------------------------------------
1 | for (i in colnames(data)) {
2 |
3 | colnames(data)[colnames(data) == i] <- as.character(col_cross[col_cross$original == i, 1])
4 |
5 | print(paste(i, "renamed to", col_cross[col_cross$original == i, 1]))
6 |
7 | }
8 |
--------------------------------------------------------------------------------
/rmaps.R:
--------------------------------------------------------------------------------
1 | # http://rmaps.github.io/blog/posts/animated-choropleths/
2 | #require(devtools)
3 | #install_github('ramnathv/rCharts@dev')
4 | #install_github('ramnathv/rMaps')
5 |
6 | library(rMaps)
7 | library(rCharts)
8 | library(reshape2)
9 |
10 | # change file path to match location on your machine
11 | folder <- '/Users/majerus/Desktop/2014 projects/blog/post1_logs/'
12 |
13 | # change file name to match name on your machine
14 | file <- 'state_enrollment_reed.csv'
15 |
16 | # read in enrollment data
17 | state <- read.csv(paste(folder, file, sep=''))
18 |
19 | # rename columns for reshape
20 | colnames(state) <- c('State', '2007', '2008', '2009', '2010', '2011', '2012', '2013')
21 |
22 | # reshape data from wide to long
23 | state_long <- melt(state)
24 |
25 | # rename columns
26 | colnames(state_long) <- c('State', 'Year', 'Students')
27 |
28 | # check class of each variable
29 | sapply(state_long, class)
30 |
31 | # convert year to numeric
32 | state_long$Year <- as.integer(as.character(state_long$Year))
33 |
34 | # convert state to character
35 | state_long$State <- as.character(state_long$State)
36 |
37 | # convert students to numeric
38 | state_long$Students <- as.numeric(state_long$Students)
39 |
40 | # change stage names to abbr.
41 | state_long$abr <- state.abb[match(as.character(state_long$State), state.name)]
42 |
43 | # log
44 | state_long$Students_log <- ifelse(state_long$Students==0, 0, log(state_long$Students))
45 |
46 |
47 |
48 |
49 | # no students from
50 | map <-
51 | ichoropleth(Students ~ abr,
52 | data = state_long,
53 | ncuts = 1,
54 | animate = 'Year',
55 | play = TRUE,
56 | legend = FALSE
57 | )
58 |
59 | map$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/no_students.html', cdn = TRUE)
60 |
61 |
62 |
63 | ichoropleth(Students ~ abr,
64 | data = state_long,
65 | ncuts = 5,
66 | animate = 'Year',
67 | play = TRUE,
68 | legend = FALSE
69 | )
70 |
71 | slider <-
72 | MYchoropleth(Students ~ abr,
73 | data = state_long,
74 | animate = 'Year',
75 | legend = TRUE
76 | )
77 | slider$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/slider.html', cdn = TRUE)
78 |
79 |
80 | play <-
81 | MYchoropleth(Students ~ abr,
82 | data = state_long,
83 | animate = 'Year',
84 | legend = FALSE,
85 | play=TRUE
86 | )
87 | play$save('/Users/majerus/Desktop/2014 projects/blog/post1_logs/rmaps/play.html', cdn = TRUE)
88 |
89 |
90 |
91 |
92 |
93 | hist(state_long$Students_log)
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 | MYchoropleth <- function(x, data, pal = "Blues", ncuts = 5, animate = NULL, play = F, map = 'usa', legend = TRUE, labels = TRUE, ...){
102 | d <- Datamaps$new()
103 | fml = lattice::latticeParseFormula(x, data = data)
104 | data = transform(data,
105 | fillKey = cut(
106 | fml$left,
107 | c(-1,0, 5,10,25,50,100),
108 | ordered_result = TRUE
109 | )
110 | )
111 | fillColors = c('white', RColorBrewer::brewer.pal(5, 'YlOrRd'))
112 | d$set(
113 | scope = map,
114 | fills = as.list(setNames(fillColors, levels(data$fillKey))),
115 | legend = legend,
116 | labels = labels,
117 | ...
118 | )
119 | if (!is.null(animate)){
120 | range_ = summary(data[[animate]])
121 | data = dlply(data, animate, function(x){
122 | y = toJSONArray2(x, json = F)
123 | names(y) = lapply(y, '[[', fml$right.name)
124 | return(y)
125 | })
126 | d$set(
127 | bodyattrs = "ng-app ng-controller='rChartsCtrl'"
128 | )
129 | d$addAssets(
130 | jshead = "http://cdnjs.cloudflare.com/ajax/libs/angular.js/1.2.1/angular.min.js"
131 | )
132 | if (play == T){
133 | d$setTemplate(chartDiv = sprintf("
134 |
138 | ", range_[1], range_[6])
151 | )
152 |
153 | } else {
154 | d$setTemplate(chartDiv = sprintf("
155 |
159 | ", range_[1], range_[6], range_[1])
167 | )
168 | }
169 | d$set(newData = data, data = data[[1]])
170 |
171 | } else {
172 | d$set(data = dlply(data, fml$right.name))
173 | }
174 | return(d)
175 | }
176 |
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/rvest.R:
--------------------------------------------------------------------------------
1 | # Libraries ---------------------------------------------------------------
2 |
3 | if( !is.element("rvest", installed.packages()[,1]) )
4 | install.packages("rvest")
5 |
6 | library(rvest)
7 | library(stringr)
8 |
9 | # read in data ------------------------------------------------------------
10 |
11 | # read in list of ids and seasons
12 | list <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/LORTdata_tcgR.csv')
13 |
14 | # create list of ids
15 | ids <- unique(list$MemberID)
16 |
17 | ids <- ids[5:6]
18 |
19 |
20 | # create function to read in data-----------
21 |
22 | read.t.data <- function(id){
23 | seasons = c(17:22)
24 |
25 | results <- do.call(rbind, lapply(1:length(seasons), function(i){
26 |
27 | url <- html(paste("http://www.tcg.org/tools/profiles/member_profiles/profile_detail.cfm?MemberID=", id, '&SeasonID=', seasons[i], sep=''))
28 |
29 | rankings <-
30 | url %>%
31 | html_nodes(".productions , #ProductionTitleRow td") %>%
32 | html_text()
33 |
34 | rankings <- as.data.frame(rankings)
35 |
36 | if(nrow(rankings)>0) {
37 | rankings <- cbind(rankings, Season = seasons[i])
38 | }
39 |
40 | return(rankings)
41 | }
42 | ))
43 |
44 | if(nrow(results)>0) {
45 | results <- cbind(id=id, results)
46 | }
47 |
48 | return(results)
49 |
50 | }
51 |
52 |
53 | # create function to scrape seating capacity in data-----------
54 |
55 | capacity.pull <- function(id){
56 | seasons = c(17:22)
57 |
58 | results <- do.call(rbind, lapply(1:length(seasons), function(i){
59 |
60 | url <- html(paste("http://www.tcg.org/tools/profiles/member_profiles/profile_detail.cfm?MemberID=", id, '&SeasonID=', seasons[i], sep=''))
61 |
62 | rankings <-
63 | url %>%
64 | html_nodes("tr:nth-child(10) td") %>%
65 | html_text()
66 |
67 | rankings <- as.data.frame(rankings)
68 |
69 | if(nrow(rankings)>0) {
70 | rankings <- cbind(rankings, Season = seasons[i])
71 | }
72 |
73 | return(rankings)
74 | }
75 | ))
76 |
77 | if(nrow(results)>0) {
78 | results <- cbind(id=id, results)
79 | }
80 |
81 | return(results)
82 |
83 | }
84 |
85 |
86 |
87 | # apply scraping function to list of ids
88 | data <- do.call(rbind, lapply(ids, read.t.data))
89 |
90 | # pull capacity d
91 | capacity <- do.call(rbind, lapply(ids, capacity.pull))
92 |
93 | capacity <- subset(capacity, str_detect(capacity$rankings, 'Seating Capacity:')==TRUE)
94 | capacity <- subset(capacity, str_detect(capacity$rankings, 'Facility Name:')==TRUE)
95 |
96 | temp <- sub(".*\r\n", "", capacity$rankings)
97 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Facility & Venue:'))))
98 |
99 |
100 | temp <- str_split(capacity$rankings, '\r\n')
101 |
102 |
103 | Facility Name:
104 | # write out data ----------------------------------------------------------
105 |
106 | write.csv(data, '/Users/majerus/Desktop/thesis_projects/theather/Shabab/data.csv')
107 |
108 |
109 |
110 |
111 | # read in scraped data ----------------------------------------------------
112 |
113 | messy <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/data.csv', row.names=1)
114 |
115 | colnames(messy) <- c('id', 'Rankings', 'Season')
116 |
117 | # pull out dates
118 | row3 <- messy[seq(1, nrow(messy), 3), ]
119 | colnames(row3) <- c('id', 'dates', 'Season')
120 |
121 | messy <- messy[-seq(1, NROW(messy), by = 3),]
122 | messy$Season <- NULL
123 |
124 |
125 | # pull out play names
126 | row2 <- messy[seq(1, nrow(messy), 2), ]
127 | row2$id <- NULL
128 | colnames(row2) <- c('play')
129 |
130 | messy <- messy[-seq(1, NROW(messy), by = 2),]
131 |
132 | # pull out extra info
133 | row1 <- messy
134 | row1$id <- NULL
135 | colnames(row1) <- c('extra')
136 |
137 | # cbind data together
138 | tidy <- cbind(row3, row2, row1)
139 |
140 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$dates, '-'))))
141 | colnames(tidy) <- c("id", "dates", "Season", "play", "extra", "start", "end" )
142 | tidy$end <- str_sub(tidy$end, start = 1, end = 9)
143 |
144 | tidy$dates <- NULL
145 | tidy$start <- as.Date(tidy$start, "%m/%d/%y")
146 | tidy$end <- as.Date(tidy$end, "%m/%d/%y")
147 |
148 | tidy$days <- tidy$end - tidy$start
149 |
150 | tidy$extra <- as.character(tidy$extra)
151 |
152 | tidy$extra <- str_replace_all(tidy$extra, "[\r\n]", '')
153 | tidy <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Facility & Venue:'))))
154 |
155 | tidy$extra <- NULL
156 |
157 | colnames(tidy) <- c('id', 'season', 'play', 'start', 'end', 'days', 'drop', 'venue')
158 | tidy$drop <- NULL
159 |
160 | write.csv(tidy, '/Users/majerus/Desktop/thesis_projects/theather/Shabab/tidy_data.csv')
161 |
162 |
163 | tidy <- read.csv('/Users/majerus/Desktop/thesis_projects/theather/Shabab/tidy_data.csv')
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 | tidy$extra <- str_replace_all(tidy$extra, "[^[:alnum:]]", " ")
179 | tidy$extra <- str_replace_all(tidy$extra, "Playwright s", '')
180 | tidy$extra <- str_trim(tidy$extra)
181 |
182 |
183 |
184 |
185 | temp <- cbind(tidy, t(as.data.frame(str_split_fixed(tidy$extra, ' ', 2))))
186 |
187 |
188 |
189 | str_split_fixed(tidy$extra, ' ', 2)
190 |
191 |
192 |
193 | gsub( ".*$", "", tidy$extra)
194 | sub(" *$","", tidy$extra, perl=T)
195 |
196 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, ' '))))
197 |
198 | str_split_fixed(tidy$extra, ' ', 2)
199 |
200 |
201 |
202 | tidy$extra <- str_replace_all(tidy$extra, ' ', '')
203 |
204 |
205 |
206 |
207 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Lyricist'))))
208 |
209 |
210 |
211 |
212 |
213 |
214 | tidy$extra <- NULL
215 |
216 | colnames(tidy) <- c('id', 'Season', 'play', 'start', 'end', 'days', 'drop', 'venue')
217 | tidy$drop <- NULL
218 |
219 |
220 |
221 | Facility & Venue:
222 |
223 |
224 | y=unlist(strsplit(tidy$extra,'[\r\n]'))
225 | sub('Facility & Venue:',y)
226 |
227 |
228 | Stage Director(s):
229 |
230 |
231 |
232 |
233 | temp <- cbind(tidy, t(as.data.frame(str_split(tidy$extra, 'Playwright'))))
234 |
235 |
236 | Lyricist(s):
237 |
238 |
239 | str_replace_all(x, "[\r\n]" , "")
240 |
241 |
242 | str_replace_all(string=a, pattern=" ", repl="")
243 |
244 | temp <- as.data.frame(str_split(tidy$extra, '"[\r\n]"'))
245 |
246 |
247 |
248 |
249 | sapply(tidy, class)
250 |
251 |
252 |
253 | k <- function(dataframe, n)dataframe[seq(n,to=nrow(dataframe),by=n),]
254 |
255 |
256 |
257 |
258 | row3 <- messy[seq(1, length(messy), 3)]
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 | messy$element <- rep(c('date', 'play', 'extra'))
267 | messy$count <- rep(1:3)
268 |
269 |
270 | tidier <- messy %>%
271 | gather(key, time, -id, -Season)
272 | tidier %>% head(8)
273 |
274 | messy$id.unique <- paste(messy$id, messy$Season, messy$count, sep='')
275 |
276 | tidy <-
277 | messy %>%
278 | spread(id.unique, Rankings, fill = NA, convert = FALSE, drop = TRUE)
279 |
280 |
281 | Season
282 | count
283 |
284 |
285 | temp <- dcast(messy, id + Season + count~element, value.var="Rankings")
286 |
287 | duplicated(messy$id.unique)
288 |
289 |
290 | , value.var="Rankings"
291 |
292 | ls(messy)
293 |
294 |
295 |
296 | temp <-
297 | reshape(messy, direction = 'wide', idvar = c('id', 'Season', 'count'), timevar = 'element',
298 | v.names = 'test_result', sep = "_")
299 |
300 |
301 |
302 |
303 |
--------------------------------------------------------------------------------
/ts_graphs.R:
--------------------------------------------------------------------------------
1 | library(ggplot2)
2 | library(dplyr)
3 | library(lubridate)
4 | library(xts)
5 | library(stringr)
6 | library(reshape2)
7 | library(plyr)
8 | library(grid)
9 |
10 |
11 | data <- read.csv('/Users/majerus/Desktop/thesis_projects/makoto/data.csv')
12 | data$X <- NULL
13 | data$Carus.Spangler.Rd. <- NULL
14 | #data[is.na(data)] <- 0
15 |
16 | data <- dplyr::rename(data, time = Date...Time)
17 |
18 | # id
19 | # data$id <- id(data[c("time")])
20 | data <- mutate(data, id = rownames(data))
21 |
22 |
23 |
24 | data$time <- str_replace(data$time, 'AM', '')
25 | data$time <- str_replace(data$time, 'PM', '')
26 | data$time <- str_trim(data$time, 'both')
27 | data$time <- str_replace(data$time, '2013', '13')
28 | data$time <- str_replace(data$time, '2014', '13')
29 | data$time <- str_replace(data$time, '2015', '13')
30 | #data$time <- str_replace(data$time, '2013', '13')
31 | #data$time <- str_replace(data$time, '24:00', '23:59')
32 |
33 | #data$time <- strptime(data$time, '%m/%d/%y %H:%M')
34 |
35 | data$Albany.Calapooia.School <- as.numeric(data$Albany.Calapooia.School )
36 | data$Beaverton.Highland.Park <- as.numeric(data$Beaverton.Highland.Park)
37 | data$Corvallis.Circle.Blvd <- as.numeric(data$Corvallis.Circle.Blvd)
38 | data$Hillsboro.Hare.Field <- as.numeric(data$Hillsboro.Hare.Field)
39 | data$Portland.SE.Lafayette <- as.numeric(data$Portland.SE.Lafayette)
40 | data$Salem.State.Hospital <- as.numeric(data$Salem.State.Hospital)
41 | data$Sauvie.Island <- as.numeric(data$Sauvie.Island)
42 | data$Sweet.Home.Fire.Department <- as.numeric(data$Sauvie.Island)
43 |
44 | long_data <- melt(data, id=c("id", "time"), na.rm=TRUE)
45 |
46 | long_data$time <- strptime(long_data$time, '%m/%d/%y %H:%M')
47 |
48 | long_data$log <- log(long_data$value)
49 |
50 |
51 | long_data$Date<-with(long_data,as.Date(time, format = "%Y/%m/%d"))
52 | graph <- ddply(long_data,.(variable, Date),summarise, ave=mean(value))
53 |
54 | p <-
55 | ggplot(aes(Date, ave, colour = variable), data = graph) +
56 | geom_line() +
57 | ggtitle("Average by Day")
58 |
59 | p <-
60 | p + annotate("text", x = as.Date(Inf), y = -Inf, label = "Created by Rich Majerus",
61 | hjust=1.1, vjust=-1.1, col="white", cex=6,
62 | fontface = "bold", alpha = 0.8) +
63 | facet_wrap( ~ variable, ncol=3)
64 |
65 | ggsave(p, file="/Users/majerus/Desktop/thesis_projects/makoto/ts.pdf", scale=2)
66 |
--------------------------------------------------------------------------------
/tutorials/Histograms advanced.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Histograms II"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Install and load ggplot
11 | ```{r, warning=FALSE, message=FALSE}
12 | # install libraries
13 | # This demo requires the 'ggplot' package
14 | if( !is.element("ggplot2", installed.packages()[,1]) )
15 | install.packages("ggplot2")
16 |
17 | # load libraries
18 | library(ggplot2)
19 | ```
20 |
21 |
22 |
23 |
24 | ####Download and Load Data
25 | ```{r}
26 | download.file("http://www.openintro.org/stat/data/evals.RData", destfile = "evals.RData")
27 | load("evals.RData")
28 | ```
29 |
30 |
31 |
32 |
33 | ####Create Histogram Plot Function
34 | ```{r, message=FALSE}
35 |
36 | # this function will create a histogram of every variable in your data frame
37 | # the function takes one argument ('data') which is the name of your data frame
38 | # if you want to save the scatter plots as .png files define the file path for graphs_folder below
39 | # you will also need to remove the "#" from the two lines in the function that are commented out and place a "#" before print
40 |
41 | # save graphs in this folder
42 | graphs_folder <- '/filepath/graphs/'
43 |
44 | hist <- function(x, na.rm = TRUE, ...) {
45 | nm <- names(x)
46 | for (i in seq_along(nm)) {
47 | print(ggplot(x,aes_string(x = nm[i])) + geom_histogram(alpha=.8, fill = "darkblue") + theme_classic()) }
48 | #plots <- ggplot(x,aes_string(x = nm[i])) + geom_histogram(alpha=.8, fill = "darkblue") + theme_classic()
49 | #ggsave(plots,filename=paste(graphs_folder, "hist_",nm[i],".png",sep=""))}
50 | }
51 |
52 | ```
53 |
54 |
55 |
56 |
57 | ####Run Histogram Plot Function to Create Plots for an Entire Data Frame
58 | ```{r, message=FALSE, warning=FALSE}
59 | hist(evals)
60 | ```
61 |
62 |
63 |
64 | ####Create Density Plot Function
65 | ```{r, message=FALSE}
66 |
67 | den <- function(x, na.rm = TRUE, ...) {
68 | x <- x[,sapply(x,is.numeric) | sapply(x,is.integer)]
69 | nm <- names(x)
70 | for (i in seq_along(nm)) {
71 | print(ggplot(x,aes_string(x = nm[i])) + geom_density(alpha=.8, fill = "darkgreen") + theme_classic())}
72 | }
73 |
74 | ```
75 |
76 |
77 |
78 |
79 | ####Run Density Plot Function to Create Kernal Density Plots for All Continuous Variables in a Data Frame
80 | ```{r, message=FALSE, warning=FALSE}
81 | den(evals)
82 | ```
83 |
84 |
85 |
--------------------------------------------------------------------------------
/tutorials/colors.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Package RColorBrewer with ggplot2"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | #####Load Data
10 | ```{r}
11 |
12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
13 | data <- read.csv("/Users/majerus/Downloads/ames.csv") # update to file path on your computer
14 |
15 | ```
16 |
17 |
18 |
19 | #####Load Packages
20 |
21 | ```{r}
22 |
23 | library(plyr)
24 | library(ggplot2)
25 | library(ggthemes)
26 | library(scales)
27 | library(reshape2)
28 | library(RColorBrewer)
29 | ```
30 |
31 |
32 |
33 |
34 | #####Calculate Mean Sale Price by Year and Building Type
35 |
36 | ```{r}
37 |
38 | # show count of building types by year of sale
39 | table(data$Yr.Sold, data$Bldg.Type)
40 |
41 | # create data frame with the mean sale price for each combintation of year and type
42 | means <- ddply(data, .(Yr.Sold, Bldg.Type), summarize,
43 | mean_price = mean(SalePrice))
44 |
45 | # show first 6 rows of new data frame
46 | head(means)
47 |
48 | # show table of mean sale price by year and type
49 | dcast(means, Yr.Sold ~ Bldg.Type)
50 |
51 | ```
52 |
53 |
54 |
55 | #####Plot Mean Sale Price by Year and Building Type (manually assign colors)
56 |
57 | ```{r}
58 |
59 | # define colors by name
60 |
61 | p <-
62 | ggplot(means, aes(Yr.Sold, mean_price, group = Bldg.Type, colour = Bldg.Type)) +
63 | geom_line(size=2) +
64 | scale_y_continuous("Mean Sale Price", labels = dollar) +
65 | scale_x_continuous("Year") +
66 | ggtitle("Mean Home Sale Price in Ames, IA") +
67 | theme_tufte() +
68 | theme(plot.title = element_text(size = 16, face="bold"))
69 |
70 | p + scale_colour_manual(values = c("red","blue", "dark green", "grey", "black"))
71 |
72 |
73 | ```
74 |
75 |
76 | ```{r}
77 |
78 | # define colors by hex code
79 | # see http://www.w3schools.com/tags/ref_colorpicker.asp & http://colorbrewer2.org
80 |
81 | p + scale_colour_manual(values = c("#0000FF","#197519", "#CC2900", "#4700B2", "#E6E600"))
82 |
83 |
84 | ```
85 |
86 |
87 |
88 | #####Plot Mean Sale Price by Year and Building Type (use assign colors RColorBrewer)
89 |
90 | ```{r}
91 | # use display.brewer.all() to see all options
92 |
93 | p + scale_colour_brewer("Colors in Set1", palette="Set1")
94 |
95 |
96 | ```
97 |
98 | ```{r}
99 |
100 | p + scale_colour_brewer("Colors in Paired", palette="Paired")
101 |
102 | ```
103 |
104 | ```{r}
105 |
106 | p + scale_colour_brewer("Colors in Spectral", palette="Spectral")
107 |
108 | ```
109 |
110 |
111 | ```{r}
112 |
113 | p + scale_colour_brewer("Colors in Dark2", palette="Dark2")
114 |
115 |
116 | ```
117 |
118 |
119 |
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/tutorials/create_variables.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Creating Variables in RSudio"
3 | date: Reed College, Instructional Technology Services
4 | output: html_document
5 | ---
6 |
7 | ```{r}
8 | # load the mtcars data
9 | data(mtcars)
10 | ```
11 |
12 | ```{r}
13 | head(mtcars) # Look at the first 6 rows of your data
14 | ```
15 |
16 |
17 | **Create a constant**
18 | ```{r}
19 | # we can create a constant that is always '1'
20 | mtcars$constant <- 1
21 | head(mtcars)
22 | ```
23 |
24 |
25 | **Create a variable from existing variables**
26 | ```{r}
27 | # Ratio of horse power to cylinders
28 | mtcars$hp_c <- mtcars$hp/mtcars$cyl
29 | head(mtcars)
30 | ```
31 |
32 |
33 | **Create a variable based on the values of existing variables**
34 | ```{r}
35 | # dummy variable to indicate if a car gets more than 20 mpg
36 | # use the "ifelse" command
37 | # ifelse(condition, if meets condition give variable this value, otherwise give variable this value)
38 | mtcars$mpg20 <- ifelse(mtcars$mpg > 20, 1, 0)
39 | head(mtcars)
40 | ```
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/tutorials/creating html tables.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Creating HTML Tables"
3 | author: "Rich Majerus"
4 | date: "November 11, 2014"
5 | output: html_document
6 | ---
7 |
8 |
9 |
10 | #####HTML Tables with xtable Package
11 | ```{r, results='asis'}
12 | library(xtable)
13 | library(plyr)
14 |
15 | data(mtcars)
16 |
17 | # create a data frame that contains mean mpg values by the number of cylinders
18 | summary_data <- ddply(mtcars, .(cyl), summarise, mean_mpg = mean(mpg))
19 |
20 | # rename columns in data frame
21 | colnames(summary_data) <- c('Cylinders', 'Mean MPG')
22 |
23 | # Create in print xtable in html
24 | print(xtable(summary_data,
25 | caption="Mean MPG by Cylinders"),
26 | "html", include.rownames=FALSE, caption.placement='top',
27 | html.table.attributes='align="left"')
28 |
29 | ```
30 |
31 |
32 |
33 | #####HTML Tables with knitr Package
34 |
35 | ```{r, results='asis'}
36 |
37 | library(knitr)
38 |
39 | kable(head(mtcars), digits=2)
40 |
41 | ```
42 |
43 |
44 |
45 |
46 |
47 | #####HTML Tables with googleVis Package
48 | ```{r, results='asis', warning=FALSE}
49 | suppressMessages(library(googleVis))
50 |
51 | cars <- cbind(car = rownames(mtcars), mtcars)
52 |
53 | table <- gvisTable(cars,
54 | #formats=list(Population="#,###")
55 | options=list(page='enable'))
56 |
57 | print(table)
58 | ```
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/tutorials/dplyr.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "dplyr Introduction"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | ##### Load Packages and Data
10 |
11 | ```{r, warning=FALSE}
12 | # load dplyr package
13 | suppressMessages(library(dplyr))
14 |
15 | # create example dataframe
16 | cars <- cbind(car = rownames(mtcars), mtcars)
17 | rownames(cars) <- NULL
18 |
19 | ```
20 |
21 |
22 |
23 | ##### dplyr verbs
24 |
25 | ```{r}
26 | # filter - subset rows of a data frame / filter(df, how to subset)
27 | filter(cars, mpg > 25)
28 | filter(cars, mpg > 25 & hp > 75)
29 |
30 | # slice - subset rows of a data frame by position / slice(df, rows to keep)
31 | slice(cars, 1:5)
32 | slice(cars, c(1:3, 11:13))
33 |
34 | # arrange - order rows of a data frame / slice(df, column names to order by)
35 | head(arrange(cars, mpg))
36 | head(arrange(cars, desc(mpg)))
37 | head(arrange(cars, desc(cyl), desc(mpg)))
38 |
39 | # select - subset columns of a data frame / select(df, names of columns to keep)
40 | head(select(cars, car, mpg))
41 | head(select(cars, car:hp))
42 | head(select(cars, -(car:hp)))
43 |
44 | # select and other dplyr verbs work with starts_with(), ends_with(), matches() and contains()
45 | head(select(cars, starts_with('c')))
46 |
47 | # select is often used with distinct - returns table of all unique values
48 | distinct(select(cars, vs, cyl))
49 |
50 | # rename - rename columns of a data frame / rename(df, new name = old name)
51 | head(rename(cars, automobile = car))
52 |
53 | # mutate - create new columns / mutate(df, new column name = formula for new column)
54 | head(mutate(cars, hp_to_wt = hp/wt))
55 |
56 | ```
57 |
58 |
59 |
60 |
61 | ##### Chaining Syntax
62 |
63 | ```{r}
64 | # count number of cars with each number of cylinders and put in descending order
65 | # n() - counts number of rows in a group
66 | cars %>%
67 | group_by(cyl) %>%
68 | summarise(cyl_count=n()) %>%
69 | arrange(desc(cyl_count))
70 |
71 | # calculate mean mpg by number of cylinders
72 | cars %>%
73 | group_by(cyl) %>%
74 | summarise(mean_mpg = mean(mpg, na.rm = TRUE))
75 |
76 | # calculate mean mpg and wt by number of cylinders
77 | # sumarise_each - applies the same function to multiple columns
78 | cars %>%
79 | group_by(cyl) %>%
80 | summarise_each(funs(mean(., na.rm = TRUE)), mpg, wt)
81 |
82 | # calculate mean, min, max and sd of mpg and wt rates by number of cyl
83 | cars %>%
84 | group_by(cyl) %>%
85 | summarise_each(funs(mean(., na.rm = TRUE),
86 | min(., na.rm = TRUE),
87 | max(., na.rm = TRUE),
88 | sd(., na.rm = TRUE)),
89 | mpg, wt)
90 |
91 | ```
92 |
93 |
94 |
95 | ##### Sampling
96 |
97 | ```{r}
98 |
99 | # sample 5 rows
100 | cars %>% sample_n(5)
101 |
102 | # sample 10% of rows
103 | cars %>%
104 | sample_frac(.1, replace = FALSE)
105 |
106 | ```
107 |
--------------------------------------------------------------------------------
/tutorials/evals.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/evals.RData
--------------------------------------------------------------------------------
/tutorials/excel.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Load data from multiple Excel worksheets"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | #### Introduction
10 | The following code allows you to read in data from each page of an Excel workbook into a list of data frames in R. Then the code will run a function to clean the data in each of those data frames. Lastly, the data frames are joined together into one data frame for analysis.
11 |
12 |
13 |
14 | #### Install and Load XLConnect Package
15 | ```{r, eval=FALSE}
16 |
17 | # install and load packages -----------------------------------------------
18 | pkg <- c("XLConnect")
19 |
20 | new.pkg <- pkg[!(pkg %in% installed.packages())]
21 |
22 | if (length(new.pkg)) {
23 | install.packages(new.pkg)
24 | }
25 |
26 | library(XLConnect)
27 |
28 | ```
29 |
30 |
31 |
32 | #### Read in Data from Excel Worksheets
33 | ```{r, eval=FALSE}
34 |
35 | # load excel workbook
36 | excel <- loadWorkbook("filepath/ExcelData.xlsx") # change to match your path
37 |
38 | # get sheet names
39 | sheet_names <- getSheets(excel)
40 | names(sheet_names) <- sheet_names
41 |
42 | # put sheets into a list of data frames
43 | sheet_list <- lapply(sheet_names, function(.sheet){readWorksheet(object=excel, .sheet)})
44 |
45 | # limit sheet_list to sheets with at least 1 dimension
46 | sheet_list2 <- sheet_list[sapply(sheet_list, function(x) dim(x)[1]) > 0]
47 | ```
48 |
49 |
50 |
51 | #### Define and Run Function to Clean Data
52 | ```{r, eval=FALSE}
53 |
54 | # code to read in each excel worksheet as individual dataframes
55 | # for (i in 2:length(sheet_list2)){assign(paste0("df", i), as.data.frame(sheet_list2[i]))}
56 |
57 | # define function to clean data in each data frame (updated based on your data)
58 | cleaner <- function(df){
59 | # drop rows with missing values
60 | df <- df[rowSums(is.na(df)) == 0,]
61 | # remove serial comma from all variables
62 | df[,-1] <- as.numeric(gsub(",", "", as.matrix(df[,-1])))
63 | # create numeric version of year variable for graphing
64 | df$Year <- as.numeric(substr(df$year, 1, 4))
65 | # return cleaned df
66 | return(df)
67 | }
68 |
69 | # clean sheets and create one data frame
70 | # data <- do.call(rbind,lapply(seq_along(sheet_list2), function(x) cleaner(sheet_list2[[x]])))
71 | data <- do.call(rbind,lapply(names(sheet_list2), function(x) cleaner(sheet_list2[[x]])))
72 |
73 |
74 | ```
75 |
76 |
77 |
--------------------------------------------------------------------------------
/tutorials/facets.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Graphing and Facets"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | #####Load Data
10 | ```{r}
11 |
12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
13 | data <- read.csv("/Users/majerus/Downloads/ames.csv") # update to file path on your computer
14 |
15 | ```
16 |
17 |
18 |
19 | #####Load Packages
20 |
21 | ```{r}
22 |
23 | library(plyr)
24 | library(ggplot2)
25 | library(ggthemes)
26 | library(scales)
27 | library(reshape2)
28 | ```
29 |
30 |
31 |
32 | #####Calculate Mean Sale Price by Year
33 |
34 | ```{r}
35 |
36 | mean <- ddply(data, .(Yr.Sold), summarize,
37 | mean_price = mean(SalePrice))
38 |
39 | ```
40 |
41 |
42 |
43 | #####Calculate Mean Sale Price by Year and Sale Condition
44 |
45 | ```{r}
46 |
47 | # show count of sale conditions by year of sale
48 | table(data$Yr.Sold, data$Sale.Condition)
49 |
50 | # create data frame with the mean sale price for each combintation of year and condition
51 | mean.facet<- ddply(data, .(Yr.Sold, Sale.Condition), summarize,
52 | mean_price = mean(SalePrice))
53 |
54 | # show first 6 rows of new data frame
55 | head(mean.facet)
56 |
57 | # show table of mean sale price by year and condition
58 | dcast(mean.facet, Yr.Sold ~ Sale.Condition)
59 |
60 | ```
61 |
62 |
63 |
64 | #####Plot Mean Sale Price by Year
65 |
66 | ```{r}
67 |
68 | ggplot(mean, aes(Yr.Sold, mean_price)) +
69 | geom_line(color="dark blue", size=2) +
70 | scale_y_continuous("Mean Sale Price", labels = dollar) +
71 | scale_x_continuous("Year") +
72 | ggtitle("Mean Home Sale Price in Ames, IA") +
73 | theme_tufte() +
74 | theme(plot.title = element_text(size = 16, face="bold"))
75 |
76 | ```
77 |
78 |
79 |
80 | #####Plot Mean Sale Price by Year and Home Type
81 |
82 | ```{r}
83 |
84 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) +
85 | geom_line(size=2) +
86 | scale_y_continuous("Mean Sale Price", labels = dollar) +
87 | scale_x_continuous("Year") +
88 | ggtitle("Mean Home Sale Price in Ames, IA") +
89 | theme_tufte() +
90 | theme(plot.title = element_text(size = 16, face="bold"))
91 |
92 | ```
93 |
94 |
95 |
96 |
97 | #####Plot Mean Sale Price by Year and Home Type (Facets)
98 |
99 | ```{r}
100 |
101 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) +
102 | geom_line(size=2) +
103 | facet_wrap( ~ Sale.Condition, ncol=1) +
104 | scale_y_continuous("Mean Sale Price", labels = dollar) +
105 | scale_x_continuous("Year") +
106 | ggtitle("Mean Home Sale Price in Ames, IA") +
107 | theme_tufte() +
108 | theme(plot.title = element_text(size = 16, face="bold"))
109 |
110 | ```
111 |
112 |
113 |
114 |
115 | ```{r}
116 |
117 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) +
118 | geom_line(size=2) +
119 | facet_wrap( ~ Sale.Condition, ncol=6) +
120 | scale_y_continuous("Mean Sale Price", labels = dollar) +
121 | scale_x_continuous("Year") +
122 | ggtitle("Mean Home Sale Price in Ames, IA") +
123 | theme_tufte() +
124 | theme(plot.title = element_text(size = 16, face="bold"),
125 | axis.text.x = element_text(angle = 45, hjust = 1))
126 |
127 | ```
128 |
129 |
130 |
131 |
132 |
133 | ```{r}
134 |
135 | ggplot(mean.facet, aes(Yr.Sold, mean_price, group = Sale.Condition, colour = Sale.Condition)) +
136 | geom_line(size=2) +
137 | facet_wrap( ~ Sale.Condition, ncol=2) +
138 | scale_y_continuous("Mean Sale Price", labels = dollar) +
139 | scale_x_continuous("Year") +
140 | ggtitle("Mean Home Sale Price in Ames, IA") +
141 | theme_tufte() +
142 | theme(plot.title = element_text(size = 16, face="bold"))
143 |
144 | ```
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/tutorials/geocoder.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Geocoder & Coordinate Conversion"
3 | author: "Rich Majerus & Kristin Bott"
4 | date: "March 4, 2015"
5 | output: html_document
6 | ---
7 |
8 |
9 | ### Intro
10 | Some introductory text could go here....
11 |
12 |
13 |
14 |
15 | A word about packages here...
16 |
17 | ```{r, warning=FALSE}
18 | library(stringr)
19 | library(httr)
20 | library(rjson)
21 | suppressMessages(library(dplyr))
22 | #library(devtools)
23 | #if (!require("leaflet")) devtools::install_github("rstudio/leaflet")
24 | library(leaflet)
25 | ```
26 |
27 |
28 |
29 | ### Geocoding function...
30 | Some text about geocoding function here
31 |
32 |
33 |
34 | ```{r, echo=FALSE}
35 |
36 | # create geocode function with tryCatch for errors
37 | geo.dsk <- function(addr){
38 | require(httr)
39 | require(rjson)
40 |
41 | out <- tryCatch({
42 |
43 | url <- "http://www.datasciencetoolkit.org/maps/api/geocode/json"
44 | response <- GET(url,query=list(sensor="FALSE",address=addr))
45 | json <- fromJSON(content(response,type="text"))
46 | loc <- json['results'][[1]][[1]]$geometry$location
47 | return(c(address=addr,long=loc$lng, lat= loc$lat))
48 | },
49 |
50 | error = function(cond) {
51 | message(paste("Address not geocoded:", addr))
52 | message("Here's the original error message:")
53 | message(cond)
54 | # Choose a return value in case of error
55 | return(NA)
56 | },
57 |
58 | warning = function(cond) {
59 | message(paste("Address caused a warning:", addr))
60 | message("Here's the original warning message:")
61 | message(cond)
62 | # Choose a return value in case of warning
63 | return(NULL)
64 | },
65 |
66 | finally = {
67 | message(paste("Processed Address:", addr))
68 | message("One down...")
69 | }
70 | )
71 | return(out)
72 | }
73 |
74 |
75 | ```
76 |
77 |
78 |
79 | Here is an example...
80 |
81 |
82 |
83 | ```{r}
84 |
85 | geo.dsk("Reed College, Portland, OR")
86 |
87 | ```
88 |
89 |
90 |
91 |
92 | This works on data frames too!!
93 |
94 |
95 |
96 | ```{r}
97 |
98 | name <- c('Carleton College', 'Pomona College', 'Reed College')
99 | street <- c('300 North College St', '333 N College Way', '3203 SE Woodstock Blvd')
100 | city <- c("Northfield", "Claremont", "Portland")
101 | state <- c('MN', 'CA', 'OR')
102 | zip <- c('55057', '91711', '97202')
103 |
104 | data <- data.frame(name, street, city, state, zip)
105 |
106 |
107 | # create location variable
108 |
109 | data$location <- paste(str_trim(as.character(data$street)),
110 | str_trim(as.character(data$city)),
111 | str_trim(as.character(data$state)),
112 | str_trim(as.character(data$zip)), sep=' ')
113 |
114 |
115 | # geocode data and bind coordinates onto data
116 |
117 | result <- cbind(name= data$name,
118 | as.data.frame(do.call(rbind,
119 | lapply(as.character(data$location), geo.dsk))))
120 |
121 | print(result)
122 |
123 | ```
124 |
125 |
126 |
127 |
128 | ### Coordinate Conversion function...
129 |
130 | If we want these data frames in another coordinate system we can simply convert them...
131 |
132 |
133 |
134 | ```{r}
135 |
136 | # coordinate conversion function
137 | degrees2meters = function(lon, lat) {
138 | x = lon * 20037508.34 / 180;
139 | y = log(tan((90 + lat) * pi / 360)) / (pi / 180);
140 | y = y * 20037508.34 / 180;
141 | z <- paste('POINT(', x,' ', y, ')', sep='')
142 | return(z)
143 | }
144 |
145 |
146 |
147 | ```
148 |
149 |
150 | Here is the first example converted....
151 |
152 |
153 | ```{r}
154 |
155 | # coordinate conversion function
156 | degrees2meters = function(lon, lat) {
157 | x = lon * 20037508.34 / 180;
158 | y = log(tan((90 + lat) * pi / 360)) / (pi / 180);
159 | y = y * 20037508.34 / 180;
160 | z <- paste('POINT(', x,' ', y, ')', sep='')
161 | return(z)
162 | }
163 |
164 |
165 | degrees2meters(-122.629179, 45.479171)
166 |
167 |
168 | ```
169 |
170 |
171 | This will work on data frames too!
172 |
173 |
174 | ```{r}
175 |
176 | result.converted <- cbind(name = result$name,
177 | as.data.frame(mapply(degrees2meters, as.numeric(result$long), as.numeric(result$lat))))
178 |
179 | colnames(result.converted) <- c('name', 'coords')
180 |
181 |
182 | print(result.converted)
183 |
184 |
185 | ```
186 |
187 |
188 | ### Last Step...make a map
189 |
190 |
191 | ```{r}
192 |
193 | # create map
194 |
195 | leaflet(result) %>%
196 | addTiles() %>%
197 | setView(-93.65, 42.0285, zoom = 3) %>%
198 | addCircles(result$long, result$lat) %>%
199 | addPopups(result$long, result$lat, paste(result$name, '!', sep=''))
200 |
201 |
202 | ```
203 |
204 |
205 |
206 |
207 |
208 |
--------------------------------------------------------------------------------
/tutorials/geocoding.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Geocoding"
3 | author: "Instructional Technology Services, Reed College"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 | ```{r, warning=FALSE}
10 |
11 | # load and install packages
12 |
13 | pkg <- c("httr", "rjson", "dplyr", "stringr", "devtools", "leaflet")
14 | new.pkg <- pkg[!(pkg %in% installed.packages())]
15 | if (length(new.pkg)) {
16 | install.packages(new.pkg)
17 | }
18 |
19 | suppressMessages(library(httr))
20 | suppressMessages(library(rjson))
21 | suppressMessages(library(dplyr))
22 | suppressMessages(library(stringr))
23 | suppressMessages(library(devtools))
24 |
25 | suppressMessages(if (!require("leaflet")) devtools::install_github("rstudio/leaflet"))
26 | suppressMessages(library(leaflet))
27 | ```
28 |
29 |
30 |
31 | ```{r}
32 |
33 | # create sample data frame of addresses to geocode
34 |
35 | name <- c('Carleton College', 'Pomona College', 'Reed College')
36 | street <- c('300 North College St', '333 N College Way', '3203 SE Woodstock Blvd')
37 | city <- c("Northfield", "Claremont", "Portland")
38 | state <- c('MN', 'CA', 'OR')
39 | zip <- c('55057', '91711', '97202')
40 | data <- data.frame(name, street, city, state, zip)
41 |
42 |
43 | # create location variable
44 |
45 | data$location <- paste(str_trim(as.character(data$street)),
46 | str_trim(as.character(data$city)),
47 | str_trim(as.character(data$state)),
48 | str_trim(as.character(data$zip)), sep=' ')
49 |
50 |
51 | ```
52 |
53 |
54 |
55 | ```{r}
56 |
57 | # create geocode function with tryCatch
58 | # geocoding api is from http://www.datasciencetoolkit.org/
59 | geo.dsk <- function(addr){
60 | require(httr)
61 | require(rjson)
62 |
63 | out <- tryCatch({
64 | url <- "http://www.datasciencetoolkit.org/maps/api/geocode/json"
65 | response <- GET(url,query=list(sensor="FALSE",address=addr))
66 | json <- fromJSON(content(response,type="text"))
67 | loc <- json['results'][[1]][[1]]$geometry$location
68 | return(c(address=addr,long=loc$lng, lat= loc$lat))
69 | },
70 |
71 | error = function(cond) {
72 | message(paste("Address not geocoded:", addr))
73 | message("Here's the original error message:")
74 | message(cond)
75 | # Choose a return value in case of error
76 | return(NA)
77 | },
78 |
79 | warning = function(cond) {
80 | message(paste("Address caused a warning:", addr))
81 | message("Here's the original warning message:")
82 | message(cond)
83 | # Choose a return value in case of warning
84 | return(NULL)
85 | },
86 |
87 | finally = {
88 | message(paste("Processed Address:", addr))
89 | message("One down...")
90 | }
91 |
92 | )
93 | return(out)
94 | }
95 |
96 | ```
97 |
98 |
99 |
100 | ```{r}
101 |
102 | # geocode data and bind coordinates onto data
103 |
104 | result <-
105 | cbind(data,
106 | as.data.frame(
107 | do.call(rbind,
108 | lapply(as.character(data$location), geo.dsk))))
109 |
110 | ```
111 |
112 |
113 |
114 | ```{r}
115 |
116 | # create map
117 |
118 | leaflet(result) %>%
119 | addTiles() %>%
120 | setView(-93.65, 42.0285, zoom = 3) %>%
121 | addCircles(result$long, result$lat) %>%
122 | addPopups(result$long, result$lat, paste(result$name, '!', sep=''))
123 |
124 |
125 | ```
126 |
127 |
--------------------------------------------------------------------------------
/tutorials/histograms.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Creating Histograms in RSudio"
3 | date: Reed College, Instructional Technology Services
4 | output: html_document
5 | ---
6 |
7 | ```{r}
8 | # load the mtcars data
9 | data(mtcars)
10 | ```
11 |
12 | Create a histogram of the mpg variable
13 | ```{r}
14 | hist(mtcars$mpg)
15 | ```
16 |
--------------------------------------------------------------------------------
/tutorials/histograms_I.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Histograms"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Create Data Frame of Majors and FTE by Department at Reed College
11 | ```{r}
12 |
13 | Departments = c('Art' , 'Music', 'Theatre', 'Anthropology', 'Economics',
14 | 'History', 'Political Science', 'Sociology', 'Chinese', 'Classics',
15 | 'English', 'French', 'German', 'Russian', 'Spanish',
16 | 'Biology', 'Chemistry', 'Mathematics', 'Physics', 'Linguistics',
17 | 'Philosophy', 'Psychology', 'Religion')
18 |
19 | Majors = c(58, 21, 16, 52, 56,
20 | 57, 68, 28, 6, 20,
21 | 150, 5, 2, 7, 3,
22 | 153.5, 74, 72.5, 125, 45,
23 | 75, 98, 25)
24 |
25 | FTE = c(7.8, 4, 6.25, 5, 5.6,
26 | 8.7, 5.5, 3, 3, 4,
27 | 12, 5, 3, 3, 5,
28 | 9, 6.8, 8, 6, 4,
29 | 5.7, 7.7, 4)
30 |
31 | data <- data.frame(Departments, Majors, FTE)
32 |
33 | # Data does not include 94 interdisciplinary majors and 40 undecided majors.
34 | # Majors like bio/chem are split between the two departments
35 | # General Lit majors are included with English
36 | # Dance majors and faculty are included with Theatre
37 | # Major Data: http://www.reed.edu/ir/ir_internal_web/intendedmajors.html and FTE Data: http://www.reed.edu/ir/facfte.html
38 |
39 | ```
40 |
41 |
42 |
43 |
44 | ####Create Histogram using Base R Commands
45 | ```{r}
46 | hist(data$Majors)
47 | ```
48 |
49 |
50 |
51 |
52 | ####Add Additional Elements to Base Histogram
53 | ```{r}
54 | hist(data$Majors,
55 | xlab = "Number of Majors", ylab = "Frequency", main = "Histogram of Majors", pch = 16, # Add labels
56 | breaks=12, # set number of bins
57 | col = "dark blue", lwd = 2) # change color and width of line
58 |
59 |
60 | ```
61 |
62 |
63 |
64 |
65 | ####Create Kernal Density using Base R Commands
66 | ```{r}
67 | plot(density(data$Majors), xlim = c(0, 200))
68 |
69 | ```
70 |
71 |
72 |
73 |
74 | ####Add Additional Elements to Base Density Plot
75 | ```{r}
76 | plot(density(data$Majors),
77 | xlim = c(0, 200),
78 | xlab = "Number of Majors", ylab = "Density", main = "Histogram of Majors", pch = 16, # Add labels
79 | col = "dark blue", lwd = 4) # change color and width of line
80 |
81 | ```
82 |
83 |
84 |
85 |
86 |
87 | ####Using ggplot2 to Make a Histogram
88 | ```{r, message=FALSE}
89 | # This demo requires the 'ggplot' package
90 | if( !is.element("ggplot2", installed.packages()[,1]) )
91 | install.packages("ggplot2")
92 |
93 | suppressPackageStartupMessages(library(ggplot2))
94 |
95 | ## Base histogram plot in ggplot
96 | ggplot(data, aes(x=Majors)) + geom_histogram()
97 | ```
98 |
99 |
100 |
101 |
102 |
103 | ####Apply Theme to Histogram Plot
104 | ```{r, message=FALSE}
105 | ggplot(data, aes(x=Majors)) +
106 | geom_histogram() +
107 | theme_classic()
108 | ```
109 |
110 |
111 |
112 |
113 | ####Add Additional Elements to Histogram
114 | ```{r, message=FALSE}
115 |
116 | ggplot(data, aes(x=Majors)) +
117 | geom_histogram(color="dark blue", size=1, fill="light blue", binwidth=15) + # change color and adjust bindwidth
118 | ggtitle("Histogram of Reed College Majors") + # add a title to the plot
119 | theme_classic()
120 |
121 | ```
122 |
123 |
124 |
125 |
126 |
127 | ####Using ggplot to Make a Density Plot
128 | ```{r}
129 |
130 | ggplot(data, aes(x=Majors)) +
131 | geom_density(color="dark blue", size=1, fill="light blue") + # change to geom_density for density plot
132 | ggtitle("Kernal Density of Reed College Majors") +
133 | theme_classic()
134 |
135 | ```
136 |
137 |
138 |
139 |
140 |
141 |
142 | ####Make Your Histogram Interactive with googleVis
143 |
144 | ```{r, warning=FALSE}
145 | ## This demo requires the 'googleVis' package
146 | if( !is.element("googleVis", installed.packages()[,1]) )
147 | install.packages("googleVis")
148 |
149 | suppressPackageStartupMessages(library(googleVis))
150 |
151 |
152 | # make a new data frame with only columns to plot
153 | keep <- c('Departments', 'Majors')
154 | data2 <- data[keep]
155 |
156 | # create interactive histogram plot using googleVis
157 | Hist <- gvisHistogram(data2, options=list(
158 | legend="{ position: 'right', maxLines: 2 }",
159 | colors="['#1A8763']",
160 | width=750, height=500))
161 |
162 | ```
163 |
164 | ```{r, results = 'asis'}
165 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
166 | print(Hist, 'chart')
167 |
168 | ```
169 |
170 |
171 |
--------------------------------------------------------------------------------
/tutorials/histograms_pdf.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Histograms in R"
3 | author: "Reed College, Instructional Technology Services"
4 | output: pdf_document
5 | ---
6 |
7 | **Create Histogram using Base R Commands**
8 | ```{r}
9 | cars <- mtcars
10 | hist(cars$mpg)
11 | ```
12 | \newpage
13 |
14 | **Add Additional Elements to Base Histogram**
15 | ```{r}
16 | hist(cars$mpg,
17 | xlab = "MPG", ylab = "Frequency", main = "Histogram of MPG", # Add labels
18 | breaks=12, # set number of bins
19 | col = "dark blue") # change color
20 |
21 |
22 | ```
23 | \newpage
24 |
25 | **Create Kernal Density using Base R Commands**
26 | ```{r}
27 | plot(density(cars$mpg))
28 |
29 | ```
30 | \newpage
31 |
32 | **Add Additional Elements to Base Density Plot**
33 | ```{r}
34 | plot(density(cars$mpg),
35 | xlab = "MPG", ylab = "Density", main = "Density Plot of MPG", # Add labels
36 | col = "dark blue", lwd = 4) # change color and width of line
37 |
38 | ```
39 | \newpage
40 |
41 | **Using ggplot2 to Make a Histogram**
42 | ```{r, message=FALSE}
43 | # This demo requires the 'ggplot' package
44 | if( !is.element("ggplot2", installed.packages()[,1]) )
45 | install.packages("ggplot2")
46 |
47 | suppressPackageStartupMessages(library(ggplot2))
48 |
49 | ## Base histogram plot in ggplot
50 | ggplot(cars, aes(x=mpg)) + geom_histogram()
51 | ```
52 | \newpage
53 |
54 |
55 | **Apply Theme to Histogram Plot**
56 | ```{r, message=FALSE}
57 | ggplot(cars, aes(x=mpg)) +
58 | geom_histogram() +
59 | theme_classic()
60 | ```
61 | \newpage
62 |
63 | **Add Additional Elements to Histogram**
64 | ```{r, message=FALSE}
65 |
66 | ggplot(cars, aes(x=mpg)) +
67 | geom_histogram(color="dark blue", size=1, fill="light blue", binwidth=15) + # change color and adjust bindwidth
68 | ggtitle("Histogram of MPG") + # add a title to the plot
69 | theme_classic()
70 |
71 | ```
72 | \newpage
73 |
74 | **Using ggplot to Make a Density Plot**
75 | ```{r}
76 |
77 | ggplot(cars, aes(x=mpg)) +
78 | geom_density(color="dark blue", size=1, fill="light blue") + # change to geom_density for density plot
79 | ggtitle("Kernal Density of MPG") +
80 | theme_classic()
81 |
82 | ```
83 | \newpage
84 |
85 |
86 |
--------------------------------------------------------------------------------
/tutorials/histograms_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/histograms_pdf.pdf
--------------------------------------------------------------------------------
/tutorials/line graphs advanced.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Line Graphs"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Download and Load Data
11 | ```{r}
12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
13 | data <- read.csv("/Users/majerus/Downloads/ames.csv") # update to file path on your computer
14 | ```
15 |
16 |
17 |
18 |
19 | ####Calculate the Mean of each Continuous Variable by Year
20 | ```{r}
21 | # This calculation requires the 'plyr' package
22 | if( !is.element("plyr", installed.packages()[,1]) )
23 | install.packages("plyr")
24 |
25 | library(plyr)
26 |
27 | # list class of each variable
28 | sapply(data, class)
29 |
30 | # keep only continuous variables
31 | data_continuous <- data[,sapply(data,is.numeric) | sapply(data,is.integer)]
32 |
33 | # calculate mean for every column in the data frame by year
34 | means <- ddply(data_continuous, .(Yr.Sold), numcolwise(mean), na.rm=T)
35 |
36 |
37 | ```
38 |
39 |
40 |
41 |
42 | ####Create Line Graph Plot Function
43 | ```{r, message=FALSE}
44 |
45 | # This demo requires the 'ggplot' package
46 | if( !is.element("ggplot2", installed.packages()[,1]) )
47 | install.packages("ggplot2")
48 |
49 | suppressPackageStartupMessages(library(ggplot2))
50 |
51 | # this function will create a line graph of every variable in your data frame
52 | # the function takes two arguments 'x' (the name of your data frame) and 'time' (the name variable to be plotted on the x-axis)
53 |
54 | line <- function(x, time, na.rm = TRUE, ...) {
55 | nm <- names(x)
56 | for (i in seq_along(nm)) {
57 | print(ggplot(x, aes_string(x = time, y = nm[i])) + geom_line(size=2, color = "darkblue") + theme_classic()) }
58 | }
59 |
60 | ```
61 |
62 |
63 |
64 |
65 | ####Run Line Graph Function to Create Plots for an Entire Data Frame
66 | ```{r}
67 | line(x=means, time="means$Yr.Sold")
68 | ```
69 |
--------------------------------------------------------------------------------
/tutorials/line graphs.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Line Graphs"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Download and Load Data
11 | ```{r}
12 | download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
13 | data <- read.csv("/Users/majerus/Downloads/ames.csv") # update to file path on your computer
14 | ```
15 |
16 |
17 |
18 |
19 | ####Calculate Mean Sale Price by Year
20 | ```{r}
21 | # This calculation requires the 'plyr' package
22 | if( !is.element("plyr", installed.packages()[,1]) )
23 | install.packages("plyr")
24 |
25 | library(plyr)
26 |
27 | mean <- ddply(data, .(Yr.Sold), summarize,
28 | mean_price = mean(SalePrice))
29 | ```
30 |
31 |
32 |
33 |
34 |
35 |
36 | ####Create Line Grap using Base R Commands
37 | ```{r}
38 | plot(mean$Yr.Sold, mean$mean_price, type = "o")
39 | ```
40 |
41 |
42 |
43 |
44 | ####Add Additional Elements to Base Line Graph
45 | ```{r}
46 |
47 | plot(mean$Yr.Sold, mean$mean_price, type = "o",
48 | xlab = "Year", ylab = "Mean Sale Price", main = "Line Graph of Mean Home Sale Price in Ames, IA", pch = 16,
49 | col = "dark blue", lwd = 3, cex = 2)
50 |
51 |
52 | ```
53 |
54 |
55 |
56 |
57 |
58 | ####Using ggplot2 to Make a Line Graph
59 | ```{r, message=FALSE}
60 | # This demo requires the 'ggplot' package
61 | if( !is.element("ggplot2", installed.packages()[,1]) )
62 | install.packages("ggplot2")
63 |
64 | suppressPackageStartupMessages(library(ggplot2))
65 |
66 | ggplot(mean, aes(Yr.Sold, mean_price)) +
67 | geom_line()
68 | ```
69 |
70 |
71 |
72 |
73 |
74 | ####Apply Theme to Line Graph
75 | ```{r, message=FALSE}
76 |
77 | if( !is.element("ggthemes", installed.packages()[,1]) )
78 | install.packages("ggthemes")
79 |
80 | if( !is.element("scales", installed.packages()[,1]) )
81 | install.packages("scales")
82 |
83 | suppressPackageStartupMessages(library(ggthemes))
84 | suppressPackageStartupMessages(library(scales))
85 |
86 | ggplot(mean, aes(Yr.Sold, mean_price)) +
87 | geom_line() +
88 | theme_tufte()
89 | ```
90 |
91 |
92 |
93 |
94 | ####Add Additional Elements to Line Graph
95 | ```{r, message=FALSE}
96 |
97 | ggplot(mean, aes(Yr.Sold, mean_price)) +
98 | geom_line(color="dark blue", size=2) +
99 | scale_y_continuous("Mean Sale Price", labels = dollar) +
100 | scale_x_continuous("Year") +
101 | ggtitle("Mean Home Sale Price in Ames, IA") +
102 | theme_tufte() +
103 | theme(plot.title = element_text(size = 16, face="bold"))
104 |
105 | ```
106 |
107 |
108 | ####Make Your Line Graph Interactive with googleVis
109 | ```{r, warning=FALSE}
110 | ## This demo requires the 'googleVis' package
111 | if( !is.element("googleVis", installed.packages()[,1]) )
112 | install.packages("googleVis")
113 |
114 | suppressPackageStartupMessages(library(googleVis))
115 | suppressPackageStartupMessages(library(scales))
116 |
117 | # add names to new data frame as factor
118 | mean$pop.html.tooltip=dollar_format()(mean$mean_price)
119 |
120 | # create interactive scatter plot using googleVis
121 | line <- gvisScatterChart(mean,
122 | options=list(tooltip="{isHtml:'True'}",
123 | legend="none", lineWidth=5, pointSize=3,
124 | vAxis="{title:'Mean Sale Price'}",
125 | hAxis="{title:'Year'}",
126 | width=750, height=500))
127 |
128 | ```
129 |
130 |
131 |
132 |
133 |
134 | ```{r, results = 'asis'}
135 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
136 | print(line, 'chart')
137 |
138 | ```
139 |
140 |
141 |
142 |
143 |
144 | ####Add an Edit Button to Your Line Graph
145 | ```{r, warning=FALSE}
146 | ## This demo requires the 'googleVis' package
147 | if( !is.element("googleVis", installed.packages()[,1]) )
148 | install.packages("googleVis")
149 |
150 | suppressPackageStartupMessages(library(googleVis))
151 |
152 | # create interactive histogram plot using googleVis
153 | line2 <- gvisScatterChart(mean,
154 | options=list(tooltip="{isHtml:'True'}",
155 | legend="none", lineWidth=5, pointSize=3,
156 | vAxis="{title:'Mean Sale Price'}",
157 | hAxis="{title:'Year'}",
158 | width=750, height=500,
159 | gvis.editor="Edit Graph"))
160 |
161 | ```
162 |
163 |
164 |
165 |
166 | ```{r, results = 'asis'}
167 | # plot interactive scatter (use 'plot(Hist)' to view in RStudio)
168 | print(line2, 'chart')
169 |
170 | ```
171 |
172 |
173 |
--------------------------------------------------------------------------------
/tutorials/load_data.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Loading Data"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 | R comes with a number of example data sets. You can view these data sets in RStudio by typing 'data()'.
8 | ```{r}
9 | summary(mtcars) # R has automatically loaded the mtcars data frame for us
10 | ```
11 |
12 |
13 |
14 | If you want to analyze other data in R there are several options for reading your data into R-Studio. Among the most common are...
15 |
16 |
17 |
18 | **From a .csv file:**
19 | ```{r, eval=FALSE}
20 | # use the read.csv commmand
21 | cars <- read.csv('filepath/filename.csv', row.names=1)
22 | # row.names tells R that the first row of our data contains variable names
23 |
24 | ```
25 |
26 |
27 | **From a Stata data file:**
28 | ```{r, eval=FALSE}
29 | # use the read.dta command that is part of the foreign package
30 | # if you have not already installed the foreign package you can type install.packages("foreign") to install it
31 |
32 | library(foreign)
33 | mydata <- read.dta("filepath/filename.dta")
34 |
35 | ```
36 |
37 |
38 | **From an Excel data file:**
39 | ```{r, eval=FALSE}
40 | # use the read.xlsx command that is part of the xlsx package
41 | # if you have not already installed the xlsx package you can type install.packages("xlsx") to install it
42 | # alternatively you can save your file in .csv format in Excel
43 |
44 | library(xlsx)
45 | mydata <- read.xlsx("filepath/filename.xlsx", 2) # the 2 tells R to read in the second page in the Excel workbook
46 |
47 | ```
48 |
49 |
--------------------------------------------------------------------------------
/tutorials/loops_with_ggplot2.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Using Loops with ggplot2"
3 | author: "Rich Majerus"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 |
11 | ```{r, warning=FALSE}
12 | library(stringr)
13 | library(reshape2)
14 | library(ggplot2)
15 | library(ggthemes)
16 | library(pander)
17 |
18 | # update this file path to point toward appropriate folders on your computer
19 |
20 | # folder where you want the graphs to be saved:
21 | results <- "/Users/majerus/Desktop/NJAIS/results/"
22 |
23 | # folder where the data is saved:
24 | labor <- "/Users/majerus/Desktop/NJAIS/data/dept_labor/age_lvl/"
25 |
26 | # create list of all .csv files in folder
27 | file_list <- list.files(path=labor, pattern="*.csv")
28 |
29 | # read in each .csv file in file_list and rbind them into a data frame called data.labor
30 | data.labor <-
31 | do.call("rbind",
32 | lapply(file_list,
33 | function(x)
34 | cbind(year = as.numeric(str_sub(x, 1, 4)),
35 | read.csv(paste(labor, x, sep=''),
36 | stringsAsFactors = FALSE))))
37 |
38 | # remove commas from numeric variables
39 | data.labor[,c(3:12)] <- lapply(
40 | data.labor[,c(3:12)],
41 | function(x) {as.numeric(
42 | gsub(",", "", x))})
43 |
44 | # drop 2010 from data then data and projections will occur in 5 year intervals
45 | data.labor <- subset(data.labor, data.labor$year!=2010)
46 |
47 | # rename cols
48 | colnames(data.labor) <- c("Year", "County", "Total",
49 | "Under 5", '5 to 9 years', '10 to 14 years', '15 to 19 years',
50 | "X20.24", "X25.29", "X30.34", "X35.39", "X40.44")
51 |
52 | # select columns of interest
53 | keep <- c("Year", "County", "Total", 'Under 5',
54 | '5 to 9 years', '10 to 14 years', '15 to 19 years')
55 |
56 | data.labor <- data.labor[keep]
57 |
58 | # melt data to long format
59 | data.labor.long <- melt(data.labor, id.vars=c("County", "Year"), variable.name="category")
60 |
61 | # remove total projections and state level projections from data
62 | data.labor.long <- subset(data.labor.long, data.labor.long$category!='Total')
63 | data.labor.long <- subset(data.labor.long, data.labor.long$County!='New Jersey')
64 |
65 | # create graphing function
66 | county.graph <- function(df, na.rm = TRUE, ...){
67 |
68 | # create list of counties in data to loop over
69 | county_list <- unique(df$County)
70 |
71 | # create for loop to produce ggplot2 graphs
72 | for (i in seq_along(county_list)) {
73 |
74 | # create plot for each county in df
75 | plot <-
76 | ggplot(subset(df, df$County==county_list[i]),
77 | aes(Year, value/1000, group = County, colour = category)) +
78 |
79 | geom_line(size=2) +
80 | facet_wrap( ~ category, ncol=2) +
81 |
82 | theme_pander() +
83 | theme(legend.position="none") +
84 |
85 | scale_y_continuous("County Population within Age Categories (thousands)",
86 | limits=c(0, max(df$value[df$County==county_list[i]]))/1000) +
87 | scale_x_continuous("Year") +
88 |
89 | ggtitle(paste(county_list[i], ' County, New Jersey \n',
90 | "County Population Projection within Age Categories (thousands) \n",
91 | sep=''))
92 |
93 | # save plots as .png
94 | # ggsave(plot, file=paste(results,
95 | # 'projection_graphs/county_graphs/',
96 | # county_list[i], ".png", sep=''), scale=2)
97 |
98 | # save plots as .pdf
99 | # ggsave(plot, file=paste(results,
100 | # 'projection_graphs/county_graphs/',
101 | # county_list[i], ".pdf", sep=''), scale=2)
102 |
103 | # print plots to screen
104 | print(plot)
105 | }
106 | }
107 |
108 | # run graphing function on long df
109 | county.graph(data.labor.long)
110 | ```
111 |
112 |
--------------------------------------------------------------------------------
/tutorials/markdown_formatting.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Markdown Formatting"
3 | date: Reed College, Instructional Technology Services
4 | output: html_document
5 | ---
6 |
7 |
8 | #### **Bold Text**
9 | ```{r}
10 | # **text** or __text__ makes your text between the ** or __ bold
11 | ```
12 | For example, __this is bold text that I made using two underscores__
13 |
14 |
15 | #### *Italic Text*
16 | ```{r}
17 | # *text* or _text_ makes your text between the * or _ italic
18 | ```
19 | For example, _this is italic text that I made using one underscore_
20 |
21 |
22 | #### Line breaks
23 | ```{r}
24 | # You can include a line break by ending a line with two (or more) spaces and a return.
25 | # Alternatively, you can use html
26 | # If we put atwo (or more) spaces and a return after the comma in the preceeding example we get the following:
27 | ```
28 | For example,
29 | _this is italic text that I made using one underscore_
30 |
31 |
32 |
33 | #### Inline R Code
34 | ```{r}
35 | # You can insert R code and calculations directly into your text
36 | # For example, the following line outside of a code chunk is displayed below.
37 | # The average mpg for cars in the data set is `r round(mean(mtcars$mpg), 2)` miles per gallon.
38 | ```
39 |
40 | The average mpg for cars in the data set is `r round(mean(mtcars$mpg), 2)` miles per gallon.
41 |
42 |
43 |
44 | #### Page Breaks
45 | In html documents you will not need page breaks. W
46 | ```{r}
47 | # When you are producing html documents, you can use the latex command "\pagebreak" to insert a page break
48 | ```
49 |
50 |
51 |
52 | #### Headings
53 | ```{r}
54 | # You can create headings with "#"
55 | # The following text starts with #Heading and ends with ######Heading
56 |
57 | ```
58 |
59 | # Heading
60 | ## Heading
61 | ### Heading
62 | #### Heading
63 | ##### Heading
64 | ###### Heading
65 |
66 |
67 |
--------------------------------------------------------------------------------
/tutorials/matriculants.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reed College Matriculants"
3 | author: "Rich Majerus"
4 | date: "March 11, 2015"
5 | output: html_document
6 | ---
7 | ```{r, echo=FALSE, warning=FALSE}
8 | # install packages
9 | pkg <- c("rvest", "dplyr", "reshape2", "googleVis", "magrittr", "ggplot2", "ggthemes", "RColorBrewer")
10 |
11 | new.pkg <- pkg[!(pkg %in% installed.packages())]
12 |
13 | if (length(new.pkg)) {
14 | install.packages(new.pkg)
15 | }
16 |
17 | # load packages
18 | suppressMessages(library(rvest))
19 | suppressMessages(library(dplyr))
20 | suppressMessages(library(reshape2))
21 | suppressMessages(library(googleVis))
22 | suppressMessages(library(ggplot2))
23 | suppressMessages(library(ggthemes))
24 | suppressMessages(library(RColorBrewer))
25 |
26 | # download html file
27 | webpage <- html("http://www.reed.edu/ir/geographic_states.html")
28 |
29 | # the data we want is in the first table on this page
30 | # the html_table() command coerces the data into a data frame
31 | webpage %>%
32 | html_nodes("table") %>%
33 | .[[1]] %>%
34 | html_table()
35 |
36 | # repeat above code but store results in a data frame
37 | data <-
38 | webpage %>%
39 | html_nodes("table") %>%
40 | .[[1]] %>%
41 | html_table()
42 |
43 | # we can now work with this data from the web as a data frame in R
44 | # remove total row from data
45 | data <-
46 | data %>%
47 | filter(State!='Total')
48 |
49 | # reshape data for plotting
50 | data_long <- melt(data, id='State')
51 |
52 | # rename columns in long data frame
53 | colnames(data_long) <- c('State', 'Year', 'Matriculants')
54 |
55 | # select states that we want to graph
56 | keep <- c('Iowa', 'Minnesota', 'South Dakota', 'Wisconsin')
57 | ```
58 |
59 | ```{r}
60 | # plot selected states
61 | ggplot(
62 | subset(data_long, data_long$State %in% keep),
63 | aes(Year, Matriculants, group = State, colour = State)) +
64 | geom_line(size=2)
65 | ```
66 |
67 | ```{r}
68 | # plot selected states (facets)
69 | ggplot(
70 | subset(data_long, data_long$State %in% keep),
71 | aes(Year, Matriculants, group = State, colour = State)) +
72 | geom_line(size=2) +
73 | facet_wrap( ~ State,
74 | ncol=2)
75 | ```
76 |
77 | ```{r}
78 | # plot selected states (facets) with title, theme and different colors
79 | ggplot(
80 | subset(data_long, data_long$State %in% keep),
81 | aes(Year, Matriculants, group = State, colour = State)) +
82 | geom_line(size=2) +
83 | facet_wrap( ~ State,
84 | ncol=nrow(subset(data_long, data_long$State %in% keep))/16) +
85 | ggtitle("Reed College Matriculants by State") +
86 | theme_tufte() +
87 | theme(plot.title = element_text(size = 16, face="bold")) +
88 | scale_colour_brewer("Colors in Set1", palette="Set1")
89 | ```
90 |
91 |
--------------------------------------------------------------------------------
/tutorials/missing_data.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Missing Data"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Create Missing Values in mtcars data
11 | ```{r}
12 | # For this example we will pretend we are missing mpg data for Merc 280, Dodge Challenger and Ferrari Dino in mtcars
13 |
14 | mtcars$mpg <- ifelse(rownames(mtcars) == 'Merc 280'|
15 | rownames(mtcars) == 'Dodge Challenger'|
16 | rownames(mtcars) == 'Ferrari Dino',
17 | -99, mtcars$mpg)
18 | ```
19 |
20 |
21 |
22 |
23 | ####Change Missing Value Code to NA
24 | ```{r}
25 | mtcars[mtcars==-99] <- NA
26 | ```
27 |
28 |
29 |
30 |
31 | ####Identify Missing Values in Data Frame
32 | ```{r}
33 | # list total number of missing values by variable
34 | colSums(is.na(mtcars))
35 | ```
36 |
37 | ```{r}
38 | # list names of cars with missing mpg
39 | rownames(mtcars)[is.na(mtcars$mpg)]
40 | ```
41 |
42 |
43 |
44 |
45 | ####Calculate Mean MPG
46 | ```{r}
47 | mean(mtcars$mpg) # missing values mess up even simple calculations
48 | ```
49 |
50 | ```{r}
51 | mean(mtcars$mpg, na.rm = TRUE) # we can get around this by telling R to ignore missing values
52 | ```
53 |
54 |
55 |
56 |
57 | ####Mean Imputation
58 | ```{r}
59 | # Mean Imputation
60 | mtcars.imputed <- mtcars
61 | mtcars.imputed$mpg <- ifelse(is.na(mtcars$mpg), mean(mtcars$mpg, na.rm = TRUE), mtcars$mpg)
62 |
63 | colSums(is.na(mtcars.imputed)) # no more missing data
64 | summary(mtcars.imputed$mpg)
65 | ```
66 |
67 |
68 |
69 |
70 | ####Mean Imputation over every Column
71 | ```{r}
72 | # Create mean imputation function
73 | mean.imputation <- function(df,...) {
74 | apply(df, 2, function(x) {x <- ifelse(is.na(x), mean(x, na.rm = TRUE), x)})
75 | }
76 |
77 | mtcars.imputed <- mean.imputation(mtcars)
78 |
79 | colSums(is.na(mtcars.imputed)) # no more missing data
80 | ```
81 |
82 |
83 |
84 |
85 | ####Percentile Imputation
86 | ```{r}
87 | # Imputation
88 | mtcars.imputed <- mtcars
89 | mtcars.imputed$mpg[is.na(mtcars.imputed$mpg)] <- quantile(mtcars.imputed$mpg, .95, na.rm = TRUE) # impute missing with 95th percentile
90 |
91 | colSums(is.na(mtcars.imputed)) # no more missing data
92 | summary(mtcars.imputed$mpg)
93 |
94 | ```
95 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/tutorials/read_and_summarize_multiple_txt.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Read Multiple .txt files and Write Summary Stats"
3 | output: html_document
4 | ---
5 |
6 |
7 |
8 | #### Install, Update and Load Packages
9 | ```{r, warning=FALSE}
10 | pkg <- c("stringr", "reshape2", "dplyr", "ggplot2", "magrittr")
11 |
12 | new.pkg <- pkg[!(pkg %in% installed.packages())]
13 |
14 | if (length(new.pkg)) {
15 | install.packages(new.pkg)
16 | }
17 |
18 | library(stringr)
19 | library(reshape2)
20 | suppressPackageStartupMessages(library(dplyr))
21 | library(ggplot2)
22 |
23 |
24 | ```
25 |
26 |
27 |
28 | #### Read in Data
29 | ```{r}
30 | # update this file path to point toward appropriate folder on your computer
31 | folder <- "/Users/majerus/Desktop/thesis_projects/linguistics/Yevgeniy/exp1/"
32 | file_list <- list.files(path=folder, pattern="*.txt")
33 |
34 | # read in each .txt file in file_list and rbind them into a data frame called data
35 | data <-
36 | do.call("rbind",
37 | lapply(file_list,
38 | function(x)
39 | read.table(paste(folder, x, sep=''),
40 | header = TRUE,
41 | stringsAsFactors = FALSE)))
42 |
43 |
44 | ```
45 |
46 |
47 | #### Clean Data
48 | ```{r}
49 | clean.data <- function(df){
50 | df <- cbind(df, colsplit(df$stimulus, ',', names = c('s1','s2', 's3')))
51 | df$answer <- ifelse(str_count(df$stimulus, 'A') == 2, 'A', 'B')
52 | df$correct <- ifelse(df$response == df$answer, 1, 0)
53 | df$reactionTime <- as.numeric(df$reactionTime)
54 | return(df)
55 | }
56 |
57 | data <- clean.data(data)
58 | ```
59 |
60 |
61 | #### Save Cleaned and Combined Data
62 | ```{r}
63 | write.csv(data, paste(folder,'cleaned_data.csv', sep = ''), row.names = FALSE)
64 | ```
65 |
66 |
67 | #### Create Data Frame of Summary Statistics
68 | ```{r}
69 | summary_stats <-
70 | data %>%
71 | group_by(subject, correct, answer) %>%
72 | summarise(count = n(),
73 | mean_reactionTime = mean(reactionTime, na.rm = TRUE),
74 | sd_reactionTime = sd(reactionTime, na.rm = TRUE),
75 | min_reactionTime= min(reactionTime, na.rm = TRUE),
76 | max_reactionTime = max(reactionTime, na.rm = TRUE))
77 | ```
78 |
79 |
80 | #### Save Summary Statistics
81 | ```{r}
82 | write.csv(summary_stats, paste(folder,'summary_stats.csv', sep = ''), row.names = FALSE)
83 | ```
84 |
85 |
--------------------------------------------------------------------------------
/tutorials/reading_and_writing.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reading and Writing .csv Files in RSudio"
3 | date: Reed College, Instructional Technology Services
4 | output: pdf_document
5 | ---
6 |
7 |
8 | **Save a data frame to .csv file using the write.csv command**
9 | ```{r}
10 | data(mtcars)
11 |
12 | # use the write.csv command followed by the file path (i.e. where you would like to save the file)
13 | write.csv(mtcars, '/Users/majerus/Desktop/R/intro/data/cars.csv', row.names=T)
14 |
15 | # remove the cars data from the workspace
16 | rm(mtcars)
17 | ```
18 |
19 |
20 |
21 | **Load data from a .csv file using the read.csv command**
22 | ```{r}
23 | # use the read.csv command followed by the file path
24 | # row.names=1 tells R that the data in the first column are the names of the rows
25 | cars <- read.csv('/Users/majerus/Desktop/R/intro/data/cars.csv', row.names=1)
26 |
27 | ```
28 |
29 |
30 | **Loading multiple .csv files as separate data frames**
31 | ```{r}
32 |
33 | folder <- "/Users/majerus/Desktop/R/intro/data/" # path to folder that holds multiple .csv files
34 | file_list <- list.files(path=folder, pattern="*.csv") # create list of all .csv files in folder
35 |
36 | # read in each .csv file in file_list and create a data frame with the same name as the .csv file
37 | for (i in 1:length(file_list)){
38 | assign(file_list[i],
39 | read.csv(paste(folder, file_list[i], sep=''))
40 | )}
41 |
42 | ```
43 |
44 |
45 | **Loading multiple .csv files into the same data frame**
46 | ```{r}
47 |
48 | folder <- "/Users/majerus/Desktop/R/intro/data/" # path to folder that holds multiple .csv files
49 | file_list <- list.files(path=folder, pattern="*.csv") # create list of all .csv files in folder
50 |
51 | # read in each .csv file in file_list and rbind them into a data frame called data
52 | data <-
53 | do.call("rbind",
54 | lapply(file_list,
55 | function(x)
56 | read.csv(paste(folder, x, sep=''),
57 | stringsAsFactors = FALSE)))
58 |
59 |
60 | ```
61 |
62 |
63 | **Load data from a Stata data file**
64 | ```{r, eval=FALSE}
65 | # use the read.dta command that is part of the foreign package
66 | # type install.packages("foreign") to install the foreign package
67 |
68 | library(foreign)
69 | mydata <- read.dta("filepath/filename.dta")
70 |
71 | ```
72 |
73 |
74 |
75 | **Load data from an Excel data file**
76 | ```{r, eval=FALSE}
77 | # use the read.xlsx command that is part of the xlsx package
78 | # type install.packages("xlsx") to install the xslx package
79 |
80 | library(xlsx)
81 | mydata <- read.xlsx("filepath/filename.xlsx", 2)
82 | # the 2 tells R to read in the second page in the Excel workbook
83 |
84 | ```
85 |
86 |
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/tutorials/reading_and_writing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/reading_and_writing.pdf
--------------------------------------------------------------------------------
/tutorials/regex.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "regex"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 |
11 | #### Regex Classes
12 |
13 | [:alnum:]
14 | Alphanumeric characters: [:alpha:] and [:digit:].
15 |
16 | [:alpha:]
17 | Alphabetic characters: [:lower:] and [:upper:].
18 |
19 | [:blank:]
20 | Blank characters: space and tab, and possibly other locale-dependent characters such as non-breaking space.
21 |
22 | [:cntrl:]
23 | Control characters. In ASCII, these characters have octal codes 000 through 037, and 177 (DEL). In another character set, these are the equivalent characters, if any.
24 |
25 | [:digit:]
26 | Digits: 0 1 2 3 4 5 6 7 8 9.
27 |
28 | [:graph:]
29 | Graphical characters: [:alnum:] and [:punct:].
30 |
31 | [:lower:]
32 | Lower-case letters in the current locale.
33 |
34 | [:print:]
35 | Printable characters: [:alnum:], [:punct:] and space.
36 |
37 | [:punct:]
38 | Punctuation characters:
39 | ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
40 |
41 | [:space:]
42 | Space characters: tab, newline, vertical tab, form feed, carriage return, space and possibly other locale-dependent characters.
43 |
44 | [:upper:]
45 | Upper-case letters in the current locale.
46 |
47 | [:xdigit:]
48 | Hexadecimal digits:
49 | 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f.
50 |
51 | See http://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html for more information.
52 |
53 |
54 |
55 | ##### Using Regular Expressions in R
56 |
57 | ```{r}
58 |
59 | ```
60 |
61 |
62 |
63 | ##### Sampling
64 |
65 | ```{r}
66 |
67 | ```
68 |
--------------------------------------------------------------------------------
/tutorials/reordering_geom_bar.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reordering Bars by Frequency in ggplot2"
3 | output: html_document
4 | ---
5 |
6 |
7 |
8 | ```{r, warning=FALSE}
9 |
10 | suppressMessages(library(ggthemes))
11 | suppressMessages(library(ggplot2))
12 | suppressMessages(library(dplyr))
13 |
14 | cars <- mtcars
15 |
16 | data <-
17 | cars %>%
18 | group_by(carb) %>%
19 | summarise(mpg=mean(mpg))
20 |
21 | ggplot(data=data, aes(x=carb, y=mpg)) +
22 | geom_bar(stat="identity")
23 |
24 | data$carb <- factor(data$carb, levels = data$carb[order(-data$mpg)])
25 |
26 | ggplot(data=data, aes(x=carb, y=mpg)) +
27 | geom_bar(stat="identity")
28 |
29 | ggplot(data=data, aes(x=carb, y=mpg, fill='Dark Blue')) +
30 | geom_bar(stat="identity") +
31 | theme_tufte()
32 |
33 |
34 | ```
35 |
36 |
--------------------------------------------------------------------------------
/tutorials/reproducible_research.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reproducible Research"
3 | author: "Rich Majerus"
4 | output:
5 | pdf_document:
6 | fig_caption: true
7 | ---
8 |
9 | ```{r, echo=FALSE}
10 | # define paths and files (this is all you need to change to run this code on your machine)
11 | folder <- '/Users/majerus/Desktop/2014 projects/blog/post1_logs/'
12 | file <- 'state_enrollment_reed.csv'
13 |
14 | # load libraries
15 | library(ggplot2)
16 | library(ggthemes)
17 | library(RColorBrewer)
18 | library(xtable)
19 |
20 | # load data
21 | state <- read.csv(paste(folder, file, sep=''))
22 |
23 | # remove Washington, D.C. from data for mapping
24 | state <- subset(state, state$State!='Washington, DC')
25 |
26 | # create variable that is log of 2013 data
27 | state$l2013.t <- log(ifelse(state$X2013!=0, state$X2013, 1))
28 |
29 | # rename columns for mapping
30 | colnames(state) <- c('state', 'yr_2007', 'yr_2008', 'yr_2009', 'yr_2010', 'yr_2011', 'yr_2012', 'yr_2013', 'log_2013')
31 | state$state <- tolower(state$state)
32 |
33 | # read in mapping data
34 | state_df <- map_data("state")
35 |
36 | # join reed data and mapping data at the state level
37 | choropleth <- merge(state_df, state, by.x = "region", by.y = "state")
38 |
39 | # order data to properly display on map
40 | choropleth <- choropleth[order(choropleth$order), ]
41 |
42 | # create map of 2013 enrollment
43 | map13 <- ggplot(choropleth, aes(long, lat, group = group)) +
44 | geom_polygon(aes(fill = yr_2013)) +
45 | coord_fixed() +
46 | theme_tufte() +
47 | scale_fill_gradientn(colours=brewer.pal(9,"Greens"), name = " ") +
48 | scale_x_continuous("Longitude") +
49 | scale_y_continuous("Latitude")
50 |
51 | # create map of log of 2013 enrollment
52 | map13l <- ggplot(choropleth, aes(long, lat, group = group)) +
53 | geom_polygon(aes(fill = log_2013))+
54 | coord_fixed() +
55 | theme_tufte() +
56 | scale_fill_gradientn(colours=brewer.pal(9,"Greens"), name = " ") +
57 | scale_x_continuous("Longitude") +
58 | scale_y_continuous("Latitude")
59 |
60 | ```
61 |
62 | You can write your entire paper (text, code, analysis, graphics, etc.) all in R Markdown. As an example, the following maps show the the geographic distribution of Reed College's enrolling students. You can reproduce this example in RStudio with this [code](https://files.reed.edu/?path=%2Fafs%2Freed.edu%2Fuser%2Fm%2Fa%2Fmajerus%2FPublic%2Ftutorials%2Fcode%2Freproducible_research.Rmd) and this [data](https://files.reed.edu/?path=%2Fafs%2Freed.edu%2Fuser%2Fm%2Fa%2Fmajerus%2FPublic%2Ftutorials%2Fdata%2Fstate_enrollment_reed.csv). Additionally, You can access a dynamic version of this analysis [online](http://blogs.reed.edu/ed-tech/2014/08/choropleth-maps/). Figure 1 shows the raw matriculant data from 2013 mapped by state. The darker a state's shading, the more matriculants from that state.
63 |
64 | ```{r, echo=FALSE, fig.cap="Domestic Geographic Distribution of 2013 Entering Class"}
65 | map13
66 | ```
67 |
68 | However, we may be interested in learning more about the variation in matriculants across all states rather than identifying the states that account for the greatest number of matriculants. One way to approach this task is to map the log of matriculants. Log transforming a variable that contains exceptionally large values (i.e., a right skewed variable) pulls those large values closer to the mean and yields a more symmetrically distributed variable. As for the map, log transforming the number of matriculants increases the variation in the color gradient across states and enables us to better visualize the distribution of Reed's matriculants across the entire country.
69 |
70 | ```{r, echo=FALSE, fig.cap="Domestic Geographic Distribution of 2013 Entering Class (Log Transformed)"}
71 | map13l
72 | ```
73 |
74 |
--------------------------------------------------------------------------------
/tutorials/reproducible_research.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/reproducible_research.pdf
--------------------------------------------------------------------------------
/tutorials/reproducible_research.tex:
--------------------------------------------------------------------------------
1 | \documentclass[]{article}
2 | \usepackage[T1]{fontenc}
3 | \usepackage{lmodern}
4 | \usepackage{amssymb,amsmath}
5 | \usepackage{ifxetex,ifluatex}
6 | \usepackage{fixltx2e} % provides \textsubscript
7 | % use upquote if available, for straight quotes in verbatim environments
8 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
9 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
10 | \usepackage[utf8]{inputenc}
11 | \else % if luatex or xelatex
12 | \ifxetex
13 | \usepackage{mathspec}
14 | \usepackage{xltxtra,xunicode}
15 | \else
16 | \usepackage{fontspec}
17 | \fi
18 | \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
19 | \newcommand{\euro}{€}
20 | \fi
21 | % use microtype if available
22 | \IfFileExists{microtype.sty}{\usepackage{microtype}}{}
23 | \usepackage[margin=1in]{geometry}
24 | \usepackage{graphicx}
25 | % Redefine \includegraphics so that, unless explicit options are
26 | % given, the image width will not exceed the width of the page.
27 | % Images get their normal width if they fit onto the page, but
28 | % are scaled down if they would overflow the margins.
29 | \makeatletter
30 | \def\ScaleIfNeeded{%
31 | \ifdim\Gin@nat@width>\linewidth
32 | \linewidth
33 | \else
34 | \Gin@nat@width
35 | \fi
36 | }
37 | \makeatother
38 | \let\Oldincludegraphics\includegraphics
39 | {%
40 | \catcode`\@=11\relax%
41 | \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}%
42 | }%
43 | \ifxetex
44 | \usepackage[setpagesize=false, % page size defined by xetex
45 | unicode=false, % unicode breaks when used with xetex
46 | xetex]{hyperref}
47 | \else
48 | \usepackage[unicode=true]{hyperref}
49 | \fi
50 | \hypersetup{breaklinks=true,
51 | bookmarks=true,
52 | pdfauthor={Rich Majerus},
53 | pdftitle={Reproducible Research},
54 | colorlinks=true,
55 | citecolor=blue,
56 | urlcolor=blue,
57 | linkcolor=magenta,
58 | pdfborder={0 0 0}}
59 | \urlstyle{same} % don't use monospace font for urls
60 | \setlength{\parindent}{0pt}
61 | \setlength{\parskip}{6pt plus 2pt minus 1pt}
62 | \setlength{\emergencystretch}{3em} % prevent overfull lines
63 | \setcounter{secnumdepth}{0}
64 |
65 | \title{Reproducible Research}
66 | \author{Rich Majerus}
67 | \date{August 27, 2014}
68 |
69 | \begin{document}
70 |
71 | \begin{center}
72 | \huge Reproducible Research \\[0.2cm]
73 | \large \emph{Rich Majerus}\\[0.1cm]
74 | \large \emph{August 27, 2014} \\
75 | \normalsize
76 | \end{center}
77 |
78 |
79 | \begin{verbatim}
80 | ## % latex table generated in R 3.1.0 by xtable 1.7-3 package
81 | ## % Thu Aug 28 09:13:08 2014
82 | ## \begin{table}[ht]
83 | ## \centering
84 | ## \begin{tabular}{rlrrrrrrr}
85 | ## \hline
86 | ## & State & 2007 & 2008 & 2009 & 2010 & 2011 & 2012 & 2013 \\
87 | ## \hline
88 | ## 1 & alabama & 0 & 5 & 1 & 0 & 0 & 0 & 1 \\
89 | ## 2 & alaska & 2 & 0 & 3 & 1 & 3 & 3 & 2 \\
90 | ## 3 & arizona & 12 & 7 & 3 & 3 & 5 & 8 & 5 \\
91 | ## 4 & arkansas & 0 & 0 & 0 & 2 & 0 & 0 & 0 \\
92 | ## 5 & california & 71 & 65 & 94 & 96 & 97 & 85 & 87 \\
93 | ## 6 & colorado & 8 & 14 & 5 & 11 & 13 & 7 & 7 \\
94 | ## 7 & connecticut & 5 & 7 & 5 & 9 & 13 & 7 & 3 \\
95 | ## 8 & delaware & 0 & 1 & 1 & 0 & 0 & 0 & 0 \\
96 | ## 10 & florida & 8 & 9 & 5 & 10 & 4 & 4 & 9 \\
97 | ## 11 & georgia & 1 & 7 & 4 & 3 & 7 & 2 & 0 \\
98 | ## 12 & hawaii & 5 & 1 & 3 & 1 & 2 & 2 & 3 \\
99 | ## 13 & idaho & 1 & 2 & 2 & 3 & 3 & 2 & 4 \\
100 | ## 14 & illinois & 12 & 5 & 5 & 12 & 4 & 3 & 11 \\
101 | ## 15 & indiana & 1 & 2 & 2 & 3 & 2 & 1 & 0 \\
102 | ## 16 & iowa & 1 & 1 & 2 & 2 & 1 & 2 & 2 \\
103 | ## 17 & kansas & 0 & 0 & 1 & 1 & 1 & 0 & 0 \\
104 | ## 18 & kentucky & 0 & 0 & 1 & 0 & 0 & 1 & 0 \\
105 | ## 19 & louisiana & 0 & 4 & 1 & 2 & 0 & 2 & 3 \\
106 | ## 20 & maine & 2 & 1 & 2 & 3 & 4 & 2 & 2 \\
107 | ## 21 & maryland & 5 & 3 & 7 & 7 & 2 & 5 & 5 \\
108 | ## 22 & massachusetts & 15 & 20 & 23 & 18 & 19 & 15 & 17 \\
109 | ## 23 & michigan & 3 & 2 & 4 & 2 & 3 & 3 & 2 \\
110 | ## 24 & minnesota & 9 & 11 & 9 & 6 & 6 & 2 & 7 \\
111 | ## 25 & mississippi & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
112 | ## 26 & missouri & 5 & 2 & 7 & 4 & 3 & 5 & 0 \\
113 | ## 27 & montana & 2 & 1 & 1 & 2 & 1 & 1 & 1 \\
114 | ## 28 & nebraska & 1 & 1 & 0 & 1 & 1 & 0 & 0 \\
115 | ## 29 & nevada & 3 & 0 & 2 & 0 & 2 & 3 & 1 \\
116 | ## 30 & new hampshire & 4 & 1 & 5 & 3 & 2 & 6 & 8 \\
117 | ## 31 & new jersey & 13 & 8 & 7 & 8 & 3 & 9 & 7 \\
118 | ## 32 & new mexico & 2 & 4 & 2 & 6 & 5 & 6 & 7 \\
119 | ## 33 & new york & 26 & 27 & 25 & 23 & 27 & 21 & 24 \\
120 | ## 34 & north carolina & 3 & 4 & 3 & 3 & 3 & 1 & 2 \\
121 | ## 35 & north dakota & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
122 | ## 36 & ohio & 7 & 3 & 1 & 2 & 3 & 1 & 2 \\
123 | ## 37 & oklahoma & 1 & 0 & 1 & 1 & 0 & 1 & 5 \\
124 | ## 38 & oregon & 20 & 28 & 30 & 24 & 26 & 28 & 28 \\
125 | ## 39 & pennsylvania & 8 & 5 & 4 & 6 & 4 & 6 & 8 \\
126 | ## 40 & rhode island & 4 & 2 & 3 & 1 & 2 & 0 & 1 \\
127 | ## 41 & south carolina & 3 & 0 & 0 & 1 & 1 & 0 & 1 \\
128 | ## 42 & south dakota & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\
129 | ## 43 & tennessee & 2 & 2 & 2 & 1 & 3 & 1 & 4 \\
130 | ## 44 & texas & 16 & 14 & 16 & 19 & 12 & 14 & 14 \\
131 | ## 45 & utah & 1 & 1 & 0 & 2 & 1 & 4 & 4 \\
132 | ## 46 & vermont & 1 & 2 & 5 & 1 & 3 & 1 & 5 \\
133 | ## 47 & virginia & 2 & 1 & 5 & 4 & 12 & 1 & 5 \\
134 | ## 48 & washington & 28 & 22 & 32 & 30 & 32 & 19 & 8 \\
135 | ## 49 & west virginia & 0 & 0 & 0 & 1 & 1 & 1 & 0 \\
136 | ## 50 & wisconsin & 2 & 4 & 0 & 2 & 2 & 5 & 5 \\
137 | ## 51 & wyoming & 0 & 1 & 1 & 1 & 0 & 0 & 0 \\
138 | ## \hline
139 | ## \end{tabular}
140 | ## \end{table}
141 | \end{verbatim}
142 |
143 | You can write your entire paper (text, code, analysis, graphics, etc.)
144 | all in R Markdown. As an example, here is a short analysis of the
145 | geographic distribution of Reed College's enrolling students. The
146 | \href{http://www.reed.edu/ir/geographic_states.html}{Institutional
147 | Research Office webpage} has information about the geographic
148 | distribution of Reed's entering classes from 2007-2013.
149 |
150 | Figure 1 shows the raw matriculant data from 2013 mapped by state. The
151 | darker a state's shading, the more matriculants from that state. Mousing
152 | over a state will reveal the exact number of students who matriculated
153 | from a certain state.
154 |
155 | \begin{figure}[htbp]
156 | \centering
157 | \includegraphics{./reproducible_research_files/figure-latex/unnamed-chunk-3.pdf}
158 | \caption{Domestic Geographic Distribution of 2013 Entering Class}
159 | \end{figure}
160 |
161 | However, we may be interested in learning more about the variation in
162 | matriculants across all states rather than identifying the states that
163 | account for the greatest number of matriculants. One way to approach
164 | this task is to map the log of matriculants or to take the log
165 | transformation of the variable of interest. Log transforming a variable
166 | that contains exceptionally large values (i.e., a right skewed variable)
167 | pulls those large values closer to the mean and yields a more
168 | symmetrically distributed variable. As for the map, log transforming the
169 | number of matriculants increases the variation in the color gradient
170 | across states and enables us to better visualize the distribution of
171 | Reed's matriculants across the entire country (as you can see in Figure
172 | 2 below).
173 |
174 | \begin{figure}[htbp]
175 | \centering
176 | \includegraphics{./reproducible_research_files/figure-latex/unnamed-chunk-4.pdf}
177 | \caption{Domestic Geographic Distribution of 2013 Entering Class (Log
178 | Transformed)}
179 | \end{figure}
180 |
181 | \end{document}
182 |
--------------------------------------------------------------------------------
/tutorials/rvest.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "rvest Introduction"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 | We can use the rvest package to scrape information from the internet into R.
8 | For example, this [page](http://www.reed.edu/ir/geographic_states.html) on Reed College's Institutional Research website contains a large table with data that we may want to analyze. Instead of trying to copy this data into Excel or having to manually recreate it, we can use rvest to pull the information directly into R.
9 |
10 |
11 | ```{r, warning=FALSE}
12 |
13 | # install packages
14 | pkg <- c("rvest", "dplyr", "reshape2", "googleVis", "magrittr")
15 |
16 | new.pkg <- pkg[!(pkg %in% installed.packages())]
17 |
18 | if (length(new.pkg)) {
19 | install.packages(new.pkg)
20 | }
21 |
22 |
23 | # load packages
24 | suppressMessages(library(rvest))
25 | suppressMessages(library(dplyr))
26 | suppressMessages(library(reshape2))
27 | suppressMessages(library(googleVis))
28 |
29 | # helpful resources for using rvest
30 | # vignette("selectorgadget")
31 | # http://blog.rstudio.org/2014/11/24/rvest-easy-web-scraping-with-r/
32 | ```
33 |
34 | **Read in data**
35 | ```{r}
36 |
37 | # download html file
38 | webpage <- html("http://www.reed.edu/ir/geographic_states.html")
39 |
40 | # the data we want is in the first table on this page
41 | # the html_table() command coerces the data into a data frame
42 | webpage %>%
43 | html_nodes("table") %>%
44 | .[[1]] %>%
45 | html_table()
46 | ```
47 |
48 | ```{r, warning=FALSE}
49 | # repeat above code but store results in a data frame
50 | data <-
51 | webpage %>%
52 | html_nodes("table") %>%
53 | .[[1]] %>%
54 | html_table()
55 | ```
56 |
57 |
58 | ```{r, warning=FALSE}
59 | # we can now work with this data from the web as a data frame in R
60 | # remove total row from data
61 | data <-
62 | data %>%
63 | filter(State!='Total')
64 |
65 | # reshape data for plotting
66 | data_long <- melt(data, id='State')
67 |
68 | # rename columns in long data frame
69 | colnames(data_long) <- c('State', 'Year', 'Matriculants')
70 |
71 | # create and manipulate variables for plotting
72 | data_long$Year <- as.numeric(as.character(data_long$Year))
73 | data_long$year <- data_long$Year
74 | data_long$state <- data_long$State
75 |
76 |
77 |
78 |
79 | ```
80 |
81 | ```{r, results='asis'}
82 | # plot data
83 | gvisMotionChart(data_long, "state", "year",
84 | yvar="Matriculants", xvar="Year",
85 | colorvar="State")
86 |
87 |
88 |
89 | ```
90 |
91 |
92 |
--------------------------------------------------------------------------------
/tutorials/scatter plots advanced.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Scatter Plots II"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Install and load ggplot
11 | ```{r, warning=FALSE, message=FALSE}
12 | # install libraries
13 | # This demo requires the 'ggplot' package
14 | if( !is.element("ggplot2", installed.packages()[,1]) )
15 | install.packages("ggplot2")
16 |
17 | # load libraries
18 | library(ggplot2)
19 | ```
20 |
21 |
22 |
23 |
24 | ####Download and Load Data
25 | ```{r}
26 | download.file("http://www.openintro.org/stat/data/evals.RData", destfile = "evals.RData")
27 | load("evals.RData")
28 | ```
29 |
30 |
31 |
32 |
33 | ####Create Scatter Plot Function
34 | ```{r, message=FALSE}
35 |
36 | # this function will create a plot of every variable in your data frame against your dependent variable
37 | # the function takes two arguments: x = the name of your data frame and dv = the name of your dependent variable
38 | # if you want to save the scatter plots as .png files define the file path for graphs_folder below
39 | # you will also need to remove the "#" from the two lines in the function that are commented out and place a "#" before print
40 |
41 | # save graphs in this folder
42 | graphs_folder <- 'filepath/graphs/'
43 |
44 | scatter <- function(x, dv, na.rm = TRUE, ...){
45 | nm <- names(x)
46 | for (i in seq_along(nm)) {
47 | print(ggplot(x, aes_string(x = nm[i], y = dv)) + geom_point(color="dark blue") + theme_classic() + stat_smooth(method = "lm", se = FALSE, size = 2, color="dark red"))}
48 | # plots <- ggplot(x, aes_string(x = nm[i], y = dv)) + geom_point(color="blue") + theme_classic() + stat_smooth(method = "lm", se = FALSE, size = 2)
49 | # ggsave(plots,filename=paste(graphs_folder, "scatter_",nm[i],".png",sep=""))}
50 | }
51 |
52 | ```
53 |
54 |
55 |
56 |
57 | ####Run Scatter Plot Function to Create Scatter Plots for an Entire Data Frame
58 | ```{r, message=FALSE, warning=FALSE}
59 | scatter(x = evals, dv = "score")
60 |
61 | ```
62 |
--------------------------------------------------------------------------------
/tutorials/scatter plots.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Scatter Plots"
3 | author: "Reed College, Instructional Technology Services"
4 | output: html_document
5 | ---
6 |
7 |
8 |
9 |
10 | ####Create Data Frame of Majors and FTE by Department at Reed College
11 | ```{r}
12 |
13 | Departments = c('Art' , 'Music', 'Theatre', 'Anthropology', 'Economics',
14 | 'History', 'Political Science', 'Sociology', 'Chinese', 'Classics',
15 | 'English', 'French', 'German', 'Russian', 'Spanish',
16 | 'Biology', 'Chemistry', 'Mathematics', 'Physics', 'Linguistics',
17 | 'Philosophy', 'Psychology', 'Religion')
18 |
19 | Majors = c(58, 21, 16, 52, 56,
20 | 57, 68, 28, 6, 20,
21 | 150, 5, 2, 7, 3,
22 | 153.5, 74, 72.5, 125, 45,
23 | 75, 98, 25)
24 |
25 | FTE = c(7.8, 4, 6.25, 5, 5.6,
26 | 8.7, 5.5, 3, 3, 4,
27 | 12, 5, 3, 3, 5,
28 | 9, 6.8, 8, 6, 4,
29 | 5.7, 7.7, 4)
30 |
31 | data <- data.frame(Departments, Majors, FTE)
32 |
33 | # Data does not include 94 interdisciplinary majors and 40 undecided majors.
34 | # Majors like bio/chem are split between the two departments
35 | # General Lit majors are included with English
36 | # Dance majors and faculty are included with Theatre
37 | # Major Data: http://www.reed.edu/ir/ir_internal_web/intendedmajors.html and FTE Data: http://www.reed.edu/ir/facfte.html
38 |
39 | ```
40 |
41 |
42 |
43 |
44 | ####Create Scatter Plot using Base R Commands
45 | ```{r}
46 | plot(data$Majors, data$FTE)
47 | ```
48 |
49 |
50 |
51 |
52 | ####Add Additional Elements to Base Scatter Plot
53 | ```{r}
54 | plot(data$Majors, data$FTE,
55 | xlab = "Majors", ylab = "FTE", main = "Reed College Majors and FTE by Deparment ", pch = 16, # Add labels
56 | xlim = c(0, 160), ylim = c(0, 15), # set limits on x-axis and y-axis
57 | col = "dark blue", cex = 2) # change color and size of points
58 |
59 |
60 | ```
61 |
62 |
63 |
64 |
65 |
66 | ####Add A Fitted Line
67 | ```{r}
68 | plot(data$Majors, data$FTE,
69 | xlab = "Majors", ylab = "FTE", main = "Reed College Majors and FTE by Deparment", pch = 16, # Add labels
70 | xlim = c(0, 160), ylim = c(0, 15), # set limits on x-axis and y-axis
71 | col = "dark blue", cex = 2) # change color and size of points
72 |
73 | abline(lm(data$FTE~data$Majors), col="dark red", lwd = 2) # add fitted regression line (y~x)
74 |
75 | ```
76 |
77 |
78 |
79 |
80 |
81 |
82 | ####Using ggplot2 to Make a Scatter Plot
83 | ```{r}
84 | # This demo requires the 'ggplot' package
85 | if( !is.element("ggplot2", installed.packages()[,1]) )
86 | install.packages("ggplot2")
87 |
88 | suppressPackageStartupMessages(library(ggplot2))
89 |
90 | ## Base scatter plot in ggplot
91 | ggplot(data, aes(x=Majors, y=FTE)) +
92 | geom_point(shape=1)
93 |
94 | ```
95 |
96 |
97 |
98 |
99 |
100 | ####Apply Theme to Scatter Plot
101 | ```{r}
102 | ggplot(data, aes(x=Majors, y=FTE)) +
103 | geom_point(shape=1)+
104 | theme_bw()
105 |
106 |
107 |
108 | ```
109 |
110 |
111 |
112 |
113 | ####Add Additional Elements to Base Scatter Plot
114 | ```{r}
115 | ggplot(data, aes(x=Majors, y=FTE)) +
116 | geom_point(color="dark blue", size=3) + # change the color and size of points
117 | geom_smooth(method=lm, se=FALSE, color="dark red") + # add a fitted line
118 | scale_y_continuous(limits = c(0, 12)) + # apply limits to the y-axis
119 | ggtitle("Reed College Majors and FTE by Deparment") + # add a title to the plot
120 | theme_bw()
121 |
122 |
123 | ```
124 |
125 |
126 |
127 |
128 | ####Make Your Scatter Plot Interactive with googleVis
129 |
130 | ```{r, warning=FALSE}
131 | ## This demo requires the 'googleVis' package
132 | if( !is.element("googleVis", installed.packages()[,1]) )
133 | install.packages("googleVis")
134 |
135 | suppressPackageStartupMessages(library(googleVis))
136 |
137 | # make a new data frame with only two columns to scatter plot
138 | keep <- c('Majors', 'FTE')
139 | data2 <- data[keep]
140 |
141 | # add names to new data frame as factor
142 | data2$pop.html.tooltip=data$Departments
143 |
144 | # create interactive scatter plot using googleVis
145 | Scatter1 <- gvisScatterChart(data2,
146 | options=list(tooltip="{isHtml:'True'}", # Define tooltip
147 | legend="none", lineWidth=0, pointSize=5,
148 | vAxis="{title:'Faculty (Total FTE)'}", # y-axis label
149 | hAxis="{title:'Majors (delared and intended)'}", # x-axis label
150 | width=750, height=500)) # plot dimensions
151 | ```
152 |
153 | ```{r, results = 'asis'}
154 | # plot interactive scatter (use 'plot(Scatter1)' to view in RStudio)
155 | print(Scatter1, 'chart')
156 |
157 | ```
158 |
159 |
160 |
161 |
162 |
163 |
164 | ####Add Additional Elements to Interactive Scatter Plot
165 | ```{r, warning=FALSE}
166 |
167 | # create interactive scatter plot using googleVis
168 | Scatter2 <- gvisScatterChart(data2,
169 | options=list(
170 | explorer="{actions: ['dragToZoom',
171 | 'rightClickToReset'],
172 | maxZoomIn:0.05}",
173 | #chartArea="{width:'85%',height:'80%'}",
174 | tooltip="{isHtml:'True'}",
175 | crosshair="{trigger:'both'}",
176 | legend="none", lineWidth=0, pointSize=5,
177 | vAxis="{title:'Faculty (Total FTE)'}",
178 | hAxis="{title:'Majors (delared and intended)'}",
179 | width=750, height=500))
180 | ```
181 |
182 |
183 | ```{r, results = 'asis'}
184 | print(Scatter2, 'chart')
185 |
186 | ```
187 |
188 | Left-click and drag to select an area of the chart to zoom-in on.
189 |
190 |
--------------------------------------------------------------------------------
/tutorials/summary_statistics.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Summarizing your Data in RSudio"
3 | date: Reed College, Instructional Technology Services
4 | output: pdf_document
5 | ---
6 |
7 | **Load the mtcars data**
8 | ```{r}
9 | data(mtcars)
10 | ```
11 |
12 | **List summary statistics of each variable**
13 | ```{r}
14 | summary(mtcars)
15 | ```
16 |
17 | **List summary statistics for one variable**
18 | ```{r}
19 | summary(mtcars$mpg)
20 | ```
21 |
22 | **List summary statistics for several variables**
23 | ```{r}
24 | summary(mtcars[,c(1,2,4:6)])
25 | ```
26 |
27 | **Using the describe command**
28 | ```{r}
29 | # install.packages("psych")
30 | library(psych)
31 | ```
32 |
33 | **Describe each variable**
34 | ```{r}
35 | describe(mtcars)
36 | ```
37 |
38 | **Describe one variable**
39 | ```{r}
40 | describe(mtcars$mpg)
41 | ```
42 |
43 | **Describe several variables**
44 | ```{r}
45 | describe(mtcars[,c(1, 4:6)])
46 | ```
47 |
48 | **Describe variables by a grouping variable**
49 | ```{r}
50 | describeBy(mtcars$mpg, mtcars$cyl)
51 | ```
52 |
--------------------------------------------------------------------------------
/tutorials/summary_statistics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/summary_statistics.pdf
--------------------------------------------------------------------------------
/tutorials/ttests_pdf.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "T-Tests in R"
3 | author: "Reed College, Instructional Technology Services"
4 | output: pdf_document
5 | ---
6 |
7 | **Load data**
8 | ```{r}
9 | data(mtcars)
10 | ```
11 |
12 | **One sample t-test**
13 | ```{r}
14 | t.test(mtcars$mpg, mu=50) # Ho: mu=3
15 | ```
16 | \newpage
17 |
18 | **Independent two sample t-test by groups**
19 | ```{r}
20 | t.test(mtcars$mpg ~ mtcars$am)
21 | ```
22 | \newpage
23 |
24 | **Independent 2-group t-test**
25 | ```{r}
26 | mpg1 <- sample(mtcars$mpg, 10, replace=F)
27 | mpg2 <- sample(mtcars$mpg, 10, replace=F)
28 |
29 | t.test(mpg1, mpg2)
30 |
31 | # possible options:
32 | # paired = TRUE
33 | # var.equal = TRUE (pooled variable estimate)
34 | # alternative="less" or alternative="greater" (one tail tests)
35 | ```
36 |
37 |
--------------------------------------------------------------------------------
/tutorials/ttests_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/majerus/r_code_tips/d5f870767ddd4eed09c5a5f194b99eec37e37130/tutorials/ttests_pdf.pdf
--------------------------------------------------------------------------------