├── .gitattributes
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── PUBLICITY.md
├── README.md
├── data
├── cpds_excel_new.xlsx
├── cpds_stata.dta
├── dirty.csv
├── feedback.Rda
├── large.csv
├── merge_practice_1.csv
├── merge_practice_2.csv
├── merge_practice_3.csv
├── mydata.Rda
├── mydata.csv
└── pew.sav
├── examples
├── save_console_output.R
└── save_console_output.txt
├── instructor
├── day_four.R
├── day_four.Rmd
├── day_four.html
├── day_four.pdf
├── day_one.R
├── day_one.Rmd
├── day_one.html
├── day_one.pdf
├── day_three.R
├── day_three.Rmd
├── day_three.html
├── day_three.pdf
├── day_two.R
├── day_two.Rmd
├── day_two.html
├── day_two.pdf
├── overflow.R
├── overflow.Rmd
├── overflow.html
└── overflow.pdf
└── scripts
├── feedback_cleaner.R
└── regenerate_files.R
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.Rmd linguist-language=R
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Example code in package build process
6 | *-Ex.R
7 |
8 | # RStudio files
9 | .Rproj.user/
10 | *.Rproj
11 | .RData
12 |
13 | # produced vignettes
14 | vignettes/*.html
15 | vignettes/*.pdf
16 | .Rproj.user
17 |
18 | # other
19 | .DS_Store
20 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## How to contribute
2 |
3 | ### Report an issue
4 |
5 | Reporting a bug with our code is one way that you can help improve it. If you choose to report an issue, please follow these steps:
6 |
7 | 1. Give your issue a descriptive title (e.g. **not** "code doesn't work")
8 | 2. Describe the exact steps that reproduce the bug, with no extraneous steps
9 | 3. If a specific bunch of code you wrote causes a bug, submit the code
10 | 4. If you are getting an error message, submit the error message
11 | 5. Include version numbers for your OS, R, and IDE
12 |
13 | ### Submit a patch
14 |
15 | Submitting new materials, data, examples, and/or code is a great way to help improve teaching materials. If you choose to submit materials to this repository, please follow these steps:
16 |
17 | 1. Fork this repository
18 | 2. Make focused, directed, and clean changes to your fork
19 | 3. Run `R --quiet -f scripts/regenerate_files.R` from the base directory
20 | 4. Commit your changes to your fork
21 | - Make sure your commit is thematically focused
22 | - Small commits are better than large commits
23 | - Use informative commit messages
24 | - If your commit fixes an issue, include `closes #<>` in your commit message
25 | 5. Push your commit to your fork
26 | 6. Create a pull request
27 | - Give your pull request a descriptive title (e.g. **not** "changes")
28 | - Explain the motivation for your changes
29 | - Explain what you have changed
30 |
31 | ##### And thanks!
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Creative Commons Attribution-NonCommercial 4.0 International Public License
3 |
4 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
5 |
6 | Section 1 – Definitions.
7 |
8 | Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
9 | Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
10 | Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
11 | Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
12 | Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
13 | Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
14 | Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
15 | Licensor means the individual(s) or entity(ies) granting rights under this Public License.
16 | NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange.
17 | Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
18 | Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
19 | You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
20 | Section 2 – Scope.
21 |
22 | License grant.
23 | Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
24 | reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and
25 | produce, reproduce, and Share Adapted Material for NonCommercial purposes only.
26 | Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
27 | Term. The term of this Public License is specified in Section 6(a).
28 | Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
29 | Downstream recipients.
30 | Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
31 | No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
32 | No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
33 | Other rights.
34 |
35 | Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
36 | Patent and trademark rights are not licensed under this Public License.
37 | To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes.
38 | Section 3 – License Conditions.
39 |
40 | Your exercise of the Licensed Rights is expressly made subject to the following conditions.
41 |
42 | Attribution.
43 |
44 | If You Share the Licensed Material (including in modified form), You must:
45 |
46 | retain the following if it is supplied by the Licensor with the Licensed Material:
47 | identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
48 | a copyright notice;
49 | a notice that refers to this Public License;
50 | a notice that refers to the disclaimer of warranties;
51 | a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
52 | indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
53 | indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
54 | You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
55 | If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
56 | If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License.
57 | Section 4 – Sui Generis Database Rights.
58 |
59 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
60 |
61 | for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only;
62 | if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and
63 | You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
64 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
65 | Section 5 – Disclaimer of Warranties and Limitation of Liability.
66 |
67 | Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.
68 | To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.
69 | The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
70 | Section 6 – Term and Termination.
71 |
72 | This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
73 | Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
74 |
75 | automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
76 | upon express reinstatement by the Licensor.
77 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
78 | For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
79 | Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
80 | Section 7 – Other Terms and Conditions.
81 |
82 | The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
83 | Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
84 | Section 8 – Interpretation.
85 |
86 | For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
87 | To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
88 | No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
89 | Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
90 |
--------------------------------------------------------------------------------
/PUBLICITY.md:
--------------------------------------------------------------------------------
1 | # Facetweet announcement
2 |
3 | Learn how to analyze your datasets in R! [insert link here](https://youtu.be/dQw4w9WgXcQ)
4 |
5 | # Information for calendar
6 |
7 | The workshop duration is 3hrs per class.
8 |
9 | # Descriptions for website
10 |
11 | ## Header
12 |
13 | **title** : R for Data Science
14 |
15 | **description** : The R for Data Science workshop series is a four part course, designed to take novices in the R language for statistical computing and produce programmers who are competent in finding, displaying, analyzing, and publishing data in R.
16 |
17 | ## Part 1
18 |
19 | **subtitle** : Basics of R
20 |
21 | **description** : Students will understand the motivation behind object orientation, and how that relates to computation. Students will be able to perform basic functions in R necessary to use the software on their computers and conduct basic arithmetic. Students will understand data types and data structures, and why and how they are different from each other.
22 |
23 | **knowledge requirements** : [Programming Fun!damentals](https://github.com/dlab-berkeley/programming-fundamentals), or equivalent prior knowledge
24 |
25 | **tech requirements** : Laptop required; please install R version 3.2 or greater in advance (University laptops will need to have R installed by an administrator); the RStudio IDE is recommended but not required
26 |
27 | ## Part 2
28 |
29 | **subtitle** : Clean and tidy data
30 |
31 | **description** : Students will be introduced to DRY principles and best practices for sanitizing and tidying data. Students will learn what missingness is, and how best to accommodate missing data in their research designs. Students will be able to read in files from disk or a database, clean the data found within them, select specific data from them, and merge them with other datasets.
32 |
33 | **knowledge requirements** : R-for-Data-Science Part 1 or equivalent prior knowledge
34 |
35 | **tech requirements** : Laptop required; please install R version 3.2 or greater in advance (University laptops will need to have R installed by an administrator); the RStudio IDE is recommended but not required
36 |
37 | ## Part 3
38 |
39 | **subtitle** : Analyzing data
40 |
41 | **description** : Students will be introduced to the principles behind the grammar of graphics and the general linear model. Students will understand the implementation of plotting in R. Students will be able to explore, summarize, and analyze data using R's implementation of exploratory and inferential data analysis.
42 |
43 | **knowledge requirements** : R-for-Data-Science Part 2 or equivalent prior knowledge
44 |
45 | **tech requirements** : Laptop required; please install R version 3.2 or greater in advance (University laptops will need to have R installed by an administrator); the RStudio IDE is recommended but not required
46 |
47 | ## Part 4
48 |
49 | **subtitle** : Functions and packages
50 |
51 | **description** : Students will be introduced to the principles behind functional programming. Students will learn how to write and import functions, add looped and vectorized computation to their functions, and control the flow of data through a function. Students will understand the basics of name spaces, and how that relates to assigning values within functions. Students will see how to successfully package a function for CRAN.
52 |
53 | **knowledge requirements** : R-for-Data-Science Part 2 or equivalent prior knowledge
54 |
55 | **tech requirements** : Laptop required; please install R version 3.2 or greater in advance (University laptops will need to have R installed by an administrator); the RStudio IDE is recommended but not required
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Materials for D-Lab's R for Data Science
3 | author: Dillon Niederhut
4 | ---
5 |
6 | This repository contains the instructor materials for the D-Lab's R intensive.
7 |
8 | ## If you are a student:
9 |
10 | You can download the contents of this repository with:
11 |
12 | ```
13 | git clone https://github.com/dlab-berkeley/r-for-data-science.git
14 | ```
15 |
16 | or, by clicking the "Download Zip" button and then extracting the `.zip` file.
17 |
18 | The instructor of this workshop series will lead you through the activities for each day.
19 |
20 | ## If you are a D-Lab instructor
21 |
22 | You'll see accumulated teaching notes and examples for each day's topics in the instructor folder. For your convenience, these are available as .Rmd, commented .R files, PDF documents, and HTML slides. The meta-document for this workshop series, which explains the logic behind the structure and topics, can be viewed [at the D-Lab guides repository](https://github.com/dlab-berkeley/guides/blob/master/r.pdf)
23 |
24 | For information on contributing to this repository, see `CONTRIBUTING.md`
25 |
26 | ## If you are a D-Lab facilitator
27 |
28 | The standard Drupal workshop descriptions and facetweet postings for this workshop series are in `PUBLICITY.md`
29 |
30 | ## Description
31 |
32 | * `data/` : data necessary for interactive coding examples
33 | * `examples/`
34 | * `save_console_output.R` : R code for saving console output to pdf
35 | * `instructor/` : teaching notes
36 | * `scripts/`
37 | * `feedback_cleaner.R` : used to clean data for use in Day 3
38 | * `regenrate_files.R` : for regenerating `.R` and `.pdf` files from `.Rmd`
39 |
40 | ## Topics:
41 |
42 | This workshop series covers:
43 |
44 | 1. Interacting with R
45 | 2. Datatypes
46 | 3. Data structures
47 | 4. Reading data
48 | 5. Sanitizing data
49 | 6. Missing data
50 | 7. Reshaping data
51 | 8. Summary statistics
52 | 9. Plotting
53 | 10. Linear models
54 | 11. Non-parametric models
55 | 12. Functions
56 | 13. Loops
57 | 14. Parallelization
58 | 15. Packages
59 |
60 | ## Libraries
61 |
62 | This workshop uses the following packages:
63 |
64 | * Amelia
65 | * devtools
66 | * dplyr
67 | * foreign
68 | * ggplot2
69 | * parallelMap
70 | * RCurl
71 | * roxygen2
72 | * stringr
73 | * tidyr
74 | * XML
75 |
76 | ---
77 | _D-Lab == Data Intensive Social Science, For All!_
78 |
--------------------------------------------------------------------------------
/data/cpds_excel_new.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/data/cpds_excel_new.xlsx
--------------------------------------------------------------------------------
/data/cpds_stata.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/data/cpds_stata.dta
--------------------------------------------------------------------------------
/data/dirty.csv:
--------------------------------------------------------------------------------
1 | Timestamp,How tall are you?,What department are you in?,Are you currently enrolled?,What is your birth order?
2 | 7/25/2015 10:08:41,very,Geology ,Yes,1
3 | 7/25/2015 10:10:56,70,999,Yes,1
4 | 7/25/2015 10:11:20,5'9, geology,999,2
5 | 7/25/2015 10:11:25,2.1,goelogy,No,"9,000"
6 | 7/25/2015 10:11:29,156,anthro,999,2
7 |
--------------------------------------------------------------------------------
/data/feedback.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/data/feedback.Rda
--------------------------------------------------------------------------------
/data/large.csv:
--------------------------------------------------------------------------------
1 | "a","b","c"
2 | 1.3581505173944,501.358150517394,-249998.641849483
3 | 11.3577147595264,510.357714759526,-248989.64228524
4 | -8.95555545612576,489.044444543874,-248012.955555456
5 | 1.88114421744398,498.881144217444,-247007.118855783
6 | 0.543663168950277,496.54366316895,-246015.456336831
7 | 3.85425376063794,498.854253760638,-245021.145746239
8 | -11.6013517930778,482.398648206922,-244047.601351793
9 | 20.2670228311676,513.267022831168,-243028.732977169
10 | -7.18780058689817,484.812199413102,-242071.187800587
11 | 6.42867292162213,497.428672921622,-241074.571327078
12 | -4.26342063090416,485.736579369096,-240104.263420631
13 | -4.62510870047102,484.374891299529,-239125.6251087
14 | 21.8557020064228,509.855702006423,-238122.144297994
15 | -5.15416757671307,481.845832423287,-237174.154167577
16 | 2.78173677952288,488.781736779523,-236193.21826322
17 | -7.39782232223834,477.602177677762,-235232.397822322
18 | 1.37565518535506,485.375655185355,-234254.624344815
19 | -1.90923527346237,481.090764726538,-233290.909235273
20 | -8.94943767253883,473.050562327461,-232332.949437673
21 | -15.9737723743721,465.026227625628,-231376.973772374
22 | -7.51565031035233,472.484349689648,-230407.51565031
23 | 6.94587168391045,485.94587168391,-229434.054128316
24 | -1.0496465784606,476.950353421539,-228485.049646578
25 | 17.0179224970545,494.017922497054,-227511.982077503
26 | 0.987848544058621,476.987848544059,-226575.012151456
27 | -1.8855214434642,473.114478556536,NA
28 | -2.89112302138148,471.108876978619,-224678.891123021
29 | -8.88373788763536,464.116262112365,NA
30 | 17.8041175915493,489.804117591549,-222766.195882408
31 | -5.12198149627124,465.878018503729,-221846.121981496
32 | 2.0005109528654,472.000510952865,-220897.999489047
33 | 5.53018496290098,474.530184962901,-219955.469815037
34 | 3.56393288323932,471.563932883239,-219020.436067117
35 | 12.2533993964188,479.253399396419,-218076.746600604
36 | 4.9746350696378,470.974635069638,-217151.02536493
37 | 9.48818384641285,NA,-216215.511816154
38 | 16.1634219553809,480.163421955381,-215279.836578045
39 | 3.24632573187004,466.24632573187,-214365.753674268
40 | -3.86165289433562,458.138347105664,-213447.861652894
41 | -3.83930087880723,457.160699121193,-212524.839300879
42 | -1.46552787866293,458.534472121337,-211601.465527879
43 | 10.06325345765,469.06325345765,-210670.936746542
44 | 8.7969736722717,466.796973672272,-209755.203026328
45 | 3.04064737136917,460.040647371369,-208845.959352629
46 | -1.50043739837891,454.499562601621,-207937.500437398
47 | 13.5331566888627,468.533156688863,-207011.466843311
48 | -12.0613500909166,441.938649909083,-206128.061350091
49 | -11.5439925079878,441.456007492012,-205220.543992508
50 | -0.376125115071009,451.623874884929,-204304.376125115
51 | -6.06441822918584,444.935581770814,-203407.064418229
52 | -5.88246018867732,444.117539811323,-202505.882460189
53 | -24.5275887312936,424.472411268706,NA
54 | 20.4240181837923,468.424018183792,-200683.575981816
55 | 7.25011433950764,454.250114339508,-199801.74988566
56 | -3.73667212877388,442.263327871226,-198919.736672129
57 | 3.93420188883161,448.934201888832,-198021.065798111
58 | 8.12932300409276,452.129323004093,-197127.870676996
59 | 13.0478718418723,456.047871841872,-196235.952128158
60 | 9.15726161508085,451.157261615081,-195354.842738385
61 | -2.47948906202933,438.520510937971,-194483.479489062
62 | 12.6907600620263,452.690760062026,-193587.309239938
63 | 15.7780182194041,454.778018219404,-192705.221981781
64 | -19.6285670734271,418.371432926573,-191863.628567073
65 | -5.55020458503775,431.449795414962,-190974.550204585
66 | -0.311818735355713,435.688181264644,-190096.311818735
67 | 8.56427052865354,443.564270528654,-189216.435729471
68 | 7.63675361892641,441.636753618926,-188348.363246381
69 | 6.94918984307691,439.949189843077,-187482.050810157
70 | 14.2088859180484,446.208885918048,-186609.791114082
71 | -24.9520663587553,406.047933641245,-185785.952066359
72 | 2.58929947743396,432.589299477434,NA
73 | -11.4241154982323,417.575884501768,NA
74 | 3.55993302857193,431.559933028572,-183180.440066971
75 | 10.9564846163327,437.956484616333,-182318.043515384
76 | 0.114643121028011,426.114643121028,-181475.885356879
77 | -10.162935627928,414.837064372072,-180635.162935628
78 | 8.07539286491372,432.075392864914,-179767.924607135
79 | -16.2439915645735,406.756008435427,-178945.243991565
80 | -9.49277709185509,412.507222908145,-178093.492777092
81 | 0.421844602169055,421.421844602169,-177240.578155398
82 | -4.2515981402115,415.748401859788,-176404.25159814
83 | -1.05328824784313,417.946711752157,-175562.053288248
84 | 1.16593338025192,419.165933380252,-174722.83406662
85 | 22.4791745416699,439.47917454167,-173866.520825458
86 | -6.034485578695,409.965514421305,-173062.034485579
87 | -5.82601358372921,409.173986416271,-172230.826013584
88 | -5.96213258127571,408.037867418724,-171401.962132581
89 | 3.94071976661435,416.940719766614,-170565.059280233
90 | -11.0222372863317,400.977762713668,-169755.022237286
91 | 1.64679675504341,412.646796755043,-168919.353203245
92 | 14.9567754592232,424.956775459223,-168085.043224541
93 | 13.8226435741203,422.82264357412,-167267.177356426
94 | 5.28340538463221,413.283405384632,-166458.716594615
95 | -8.78828990075244,398.211710099248,-165657.788289901
96 | 3.97265442085011,409.97265442085,-164832.027345579
97 | 11.9293646479669,416.929364647967,-164013.070635352
98 | -10.3330482666406,393.666951733359,-163226.333048267
99 | 12.8464360814843,415.846436081484,-162396.153563919
100 | 5.89441807362869,407.894418073629,-161598.105581926
101 | 3.65497768135184,404.654977681352,-160797.345022319
102 | 8.95499363626723,408.954993636267,-159991.045006364
103 | -21.7021552310344,377.297844768966,-159222.702155231
104 | -0.0465940891250309,397.953405910875,-158404.046594089
105 | -6.5291574157617,390.470842584238,-157615.529157416
106 | 10.5230033415697,406.52300334157,-156805.476996658
107 | -2.80025676469946,392.199743235301,-156027.800256765
108 | -8.48969709317775,385.510302906822,-155244.489697093
109 | 2.3950835264703,395.39508352647,-154446.604916474
110 | 1.84016651556131,393.840166515561,NA
111 | 5.94263722106124,396.942637221061,-152875.057362779
112 | 10.4433938638096,400.44339386381,-152089.556606136
113 | 0.744670766355569,389.744670766356,-151320.255329234
114 | 14.3721987516088,402.372198751609,-150529.627801248
115 | 7.10235444387405,394.102354443874,-149761.897645556
116 | 19.6421087313306,405.642108731331,NA
117 | 8.06146243489221,393.061462434892,-148216.938537565
118 | -11.3270947858309,372.672905214169,-147467.327094786
119 | 2.34007394825573,385.340073948256,-146686.659926052
120 | -13.1993586548931,368.800641345107,-145937.199358655
121 | 3.68431685168775,384.684316851688,-145157.315683148
122 | -17.6178564660341,362.382143533966,-144417.617856466
123 | 6.98194349085335,385.981943490853,-143634.018056509
124 | -6.97575154559194,371.024248454408,-142890.975751546
125 | 12.2225257479955,NA,-142116.777474252
126 | 10.2589828233097,386.25898282331,-141365.741017177
127 | -13.875818168771,361.124181831229,-140638.875818169
128 | 13.7507811760448,387.750781176045,-139862.249218824
129 | 0.624879444184843,373.624879444185,-139128.375120556
130 | -14.4649047957753,357.535095204225,-138398.464904796
131 | 12.9804428894361,383.980442889436,-137628.019557111
132 | -10.8382172957735,359.161782704227,-136910.838217296
133 | NA,378.041613293329,-136151.958386707
134 | -3.16270014690147,364.837299853099,-135427.162700147
135 | 1.0349482916108,368.034948291611,-134687.965051708
136 | 16.9455952677516,382.945595267752,-133939.054404732
137 | -18.7186518874281,346.281348112572,-133243.718651887
138 | -1.79158948488302,362.208410515117,-132497.791589485
139 | -12.377784917909,350.622215082091,-131781.377784918
140 | -12.5391519130401,349.46084808696,-131056.539151913
141 | 0.274063431279612,361.27406343128,-130320.725936569
142 | -19.2248201730858,340.775179826914,-129619.224820173
143 | 3.44442622462494,362.444426224625,-128877.555573775
144 | 12.2101150683734,370.210115068373,-128151.789884932
145 | -10.0779610901819,346.922038909818,-127459.07796109
146 | -0.985421629741494,355.014578370259,-126736.98542163
147 | 3.25122710133514,358.251227101335,-126021.748772899
148 | -25.637721335228,328.362278664772,-125341.637721335
149 | -10.818588628341,342.181411371659,-124619.818588628
150 | -1.48470852792634,350.515291472074,-123905.484708528
151 | -16.5626687750116,334.437331224988,-123217.562668775
152 | 3.19189092669998,353.1918909267,-122496.808109073
153 | 0.478583427057276,349.478583427057,-121800.521416573
154 | -3.80458463516452,NA,-121107.804584635
155 | -16.4373475020683,330.562652497932,-120425.437347502
156 | 9.05836957054321,355.058369570543,-119706.941630429
157 | 1.73534072733478,346.735340727335,-119023.264659273
158 | -15.9561062275161,328.043893772484,-118351.956106228
159 | -14.524254590909,328.475745409091,-117663.524254591
160 | -15.037525782204,326.962474217796,-116979.037525782
161 | 1.26624271455263,342.266242714553,-116279.733757285
162 | -11.6363827081937,328.363617291806,-115611.636382708
163 | -3.38212909727806,335.617870902722,-114924.382129097
164 | -23.1645260631891,314.835473936811,-114267.164526063
165 | -4.37974698789867,332.620253012101,-113573.379746988
166 | 7.24039490212025,343.24039490212,-112888.759605098
167 | 1.63626940133176,336.636269401332,-112223.363730599
168 | -0.432433557677967,333.567566442322,-111556.432433558
169 | 7.90485939830822,NA,-110881.095140602
170 | -0.734809945897447,331.265190054103,-110224.734809946
171 | -7.73139589131911,323.268604108681,-109568.731395891
172 | 5.61362209303005,335.61362209303,-108894.386377907
173 | 1.33569775999879,330.335697759999,-108239.66430224
174 | -1.64918391715711,326.350816082843,-107585.649183917
175 | 6.45429657809836,333.454296578098,-106922.545703422
176 | 19.4910265703386,345.491026570339,-106256.50897343
177 | 1.09316608428602,326.093166084286,-105623.906833916
178 | -3.21363459186323,NA,-104979.213634592
179 | -0.63513754055205,322.364862459448,-104329.635137541
180 | 16.0851609846775,338.085160984677,-103667.914839015
181 | -0.903742272269506,320.096257727731,-103041.903742272
182 | 1.78209411530552,321.782094115306,-102398.217905885
183 | NA,314.134678184772,-101765.865321815
184 | NA,328.386139150149,NA
185 | 4.29172055786855,321.291720557869,-100484.708279442
186 | 0.133336083210772,316.133336083211,-99855.8666639168
187 | 0.0873211432145336,315.087321143215,-99224.9126788568
188 | 4.34703393065037,318.34703393065,-98591.6529660693
189 | -6.54732353217941,306.452676467821,-97975.5473235322
190 | 5.44633583461175,317.446335834612,-97338.5536641654
191 | -3.66377027820612,307.336229721794,-96724.6637702782
192 | -8.61048934646851,301.389510653531,-96108.6104893465
193 | -3.31471289752472,305.685287102475,-95484.3147128975
194 | 6.21570957381675,314.215709573817,-94857.7842904262
195 | 5.18041185382813,312.180411853828,-94243.8195881462
196 | -9.49900695685376,296.500993043146,-93645.4990069569
197 | 15.279601807336,320.279601807336,-93009.7203981927
198 | 8.70443551911112,312.704435519111,-92407.2955644809
199 | 10.3574561487484,313.357456148748,-91798.6425438513
200 | 2.6052659503092,304.605265950309,-91201.3947340497
201 | -5.1110467554415,295.888953244558,-90606.1110467554
202 | 6.5558265114815,306.555826511481,-89993.4441734885
203 | -3.43073719720164,295.569262802798,-89404.4307371972
204 | -8.93689975442135,289.063100245579,-88812.9368997544
205 | -4.24060225239137,292.759397747609,-88213.2406022524
206 | -3.11811108607603,292.881888913924,-87619.1181110861
207 | -8.29064935639284,286.709350643607,-87033.2906493564
208 | -23.758635412573,270.241364587427,-86459.7586354126
209 | -12.313523713606,280.686476286394,-85861.3135237136
210 | 3.38702450698695,295.387024506987,-85260.612975493
211 | 13.9360535868532,304.936053586853,-84667.0639464132
212 | 17.2538049595115,307.253804959512,-84082.7461950405
213 | 0.995896257700937,289.995896257701,-83520.0041037423
214 | 5.13345807824215,293.133458078242,-82938.8665419218
215 | 8.2487040867109,295.248704086711,NA
216 | 4.70727953512302,290.707279535123,-81791.2927204649
217 | -1.44598401603106,283.554015983969,-81226.445984016
218 | -22.7231102955257,261.276889704474,-80678.7231102955
219 | 0.859360245326562,283.859360245327,-80088.1406397547
220 | 0.802281751053773,282.802281751054,-79523.1977182489
221 | 15.5819596590036,296.581959659004,-78945.418040341
222 | -5.8000887662359,274.199911233764,-78405.8000887662
223 | -13.1392425826392,265.860757417361,-77854.1392425826
224 | 1.76570103090634,279.765701030906,-77282.2342989691
225 | -20.0934880832453,256.906511916755,NA
226 | 4.58734284097442,280.587342840974,-76171.412657159
227 | 2.84741016364773,277.847410163648,-75622.1525898363
228 | -18.8704820563166,255.129517943683,-75094.8704820563
229 | 3.66064975731091,276.660649757311,-74525.3393502427
230 | -5.34710873154519,266.652891268455,-73989.3471087315
231 | -12.6754149821461,258.324585017854,-73453.6754149821
232 | 4.55624068166475,274.556240681665,-72895.4437593183
233 | -4.65331263851466,264.346687361485,-72365.6533126385
234 | -5.27088777964937,262.729112220351,-71829.2708877797
235 | 0.528830506687434,267.528830506687,-71288.4711694933
236 | 7.03591033801163,273.035910338012,NA
237 | 12.8776181063941,277.877618106394,-70212.1223818936
238 | 11.3783555537835,275.378355553783,-69684.6216444462
239 | 3.79947253499103,266.799472534991,-69165.200527465
240 | 0.224453283618614,262.224453283619,-68643.7755467164
241 | -3.19817766025074,257.801822339749,-68124.1981776602
242 | 0.296041237584306,260.296041237584,-67599.7039587624
243 | -9.42430707567786,249.575692924322,-67090.4243070757
244 | 10.0490949471492,268.049094947149,-66553.9509050529
245 | 9.23150099378385,266.231500993784,NA
246 | -1.58419908270131,254.415800917299,-65537.5841990827
247 | -3.04195775027664,251.958042249723,-65028.0419577503
248 | -1.95449641955894,252.045503580441,-64517.9544964196
249 | -22.893747890277,230.106252109723,-64031.8937478903
250 | 5.43519268017744,257.435192680177,-63498.5648073198
251 | -5.00142003779859,245.998579962201,-63006.0014200378
252 | -11.9791437207358,238.020856279264,-62511.9791437207
253 | 23.3182556005606,272.318255600561,-61977.6817443994
254 | 7.03066620593532,255.030666205935,-61496.9693337941
255 | 6.47235569352978,253.47235569353,-61002.5276443065
256 | 5.66791131677032,251.66791131677,-60510.3320886832
257 | -4.62140329595731,240.378596704043,-60029.621403296
258 | 6.38505543821194,250.385055438212,-59529.6149445618
259 | -7.17921392946991,235.82078607053,-59056.1792139295
260 | 17.5012593363778,259.501259336378,-58546.4987406636
261 | 3.36646517152614,244.366465171526,-58077.6335348285
262 | 2.37209035036224,242.372090350362,-57597.6279096496
263 | -3.63559175149936,235.364408248501,-57124.6355917515
264 | -18.7297112949892,219.270288705011,-56662.729711295
265 | -1.40343970042959,235.59656029957,-56170.4034397004
266 | 3.67179586111382,239.671795861114,NA
267 | -15.1853327173846,219.814667282615,-55240.1853327174
268 | -1.60334987916399,232.396650120836,-54757.6033498792
269 | 0.029548906663531,233.029548906664,-54288.9704510933
270 | -1.11620163225631,230.883798367744,-53825.1162016323
271 | -0.326958730783566,230.673041269216,-53361.3269587308
272 | -1.06581810028232,228.934181899718,-52901.0658181003
273 | 14.3320700291467,243.332070029147,-52426.6679299709
274 | -8.44982787552532,219.550172124475,-51992.4498278755
275 | 9.59560490746185,236.595604907462,-51519.4043950925
276 | 20.4538062052741,246.453806205274,-51055.5461937947
277 | 10.2030650954982,235.203065095498,-50614.7969349045
278 | 1.47126327267434,225.471263272674,-50174.5287367273
279 | -17.0331031304331,205.966896869567,-49746.0331031304
280 | -15.5183030772447,NA,-49299.5183030772
281 | 31.1318937478317,252.131893747832,-48809.8681062522
282 | -3.65548932014268,216.344510679857,NA
283 | 2.40252172903538,221.402521729035,-47958.597478271
284 | 2.39534280827645,220.395342808276,-47521.6046571917
285 | 11.0029749563063,228.002974956306,-47077.9970250437
286 | 6.46609680545292,222.466096805453,-46649.5339031945
287 | -13.4011616297204,201.59883837028,-46238.4011616297
288 | 8.86814829360056,NA,-45787.1318517064
289 | NA,213.930286454016,-45368.069713546
290 | 20.4191875354118,232.419187535412,-44923.5808124646
291 | 2.75061098332672,NA,-44518.2493890167
292 | 8.56842484412152,NA,-44091.4315751559
293 | -20.9527390650258,188.047260934974,-43701.952739065
294 | 4.73427599901052,212.734275999011,-43259.265724001
295 | 4.02332142066545,211.023321420665,-42844.9766785793
296 | -9.22133296383933,196.778667036161,-42445.2213329638
297 | -14.9576383118771,190.042361688123,-42039.9576383119
298 | 8.43692906973503,212.436929069735,-41607.5630709303
299 | NA,214.03360904411,-41197.9663909559
300 | 16.8659874632013,218.865987463201,-40787.1340125368
301 | 17.7954314540345,NA,-40383.204568546
302 | 9.24622601053251,209.246226010533,NA
303 | -8.19781490118796,190.802185098812,-39609.1978149012
304 | -5.98068619156291,192.019313808437,-39209.9806861916
305 | -12.4864088577363,184.513591142264,-38821.4864088577
306 | 8.89014654703184,204.890146547032,-38407.109853453
307 | 5.27474725391051,200.274747253911,-38019.7252527461
308 | 10.1806699832168,204.180669983217,-37625.8193300168
309 | 1.48411543672474,194.484115436725,-37247.5158845633
310 | -13.6219503956211,178.378049604379,-36877.6219503956
311 | 8.88100039962013,199.88100039962,-36472.1189996004
312 | 4.6247137590008,194.624713759001,-36095.375286241
313 | NA,185.673559357793,-35724.3264406422
314 | -11.4420004002092,NA,-35355.4420004002
315 | 0.641291339366978,187.641291339367,-34968.3587086606
316 | -18.0576638578454,167.942336142155,-34614.0576638578
317 | NA,193.218456689476,-34216.7815433105
318 | 12.4543075843774,196.454307584377,-33843.5456924156
319 | 13.0025523046514,196.002552304651,-33475.9974476954
320 | 8.04287570242736,190.042875702427,-33115.9571242976
321 | -6.73429938840597,174.265700611594,-32767.7342993884
322 | -5.17704201004565,174.822957989954,-32405.17704201
323 | -3.19479066115539,175.805209338845,-32044.1947906612
324 | -8.18100502406904,169.818994975931,-31692.1810050241
325 | -25.6335505740385,151.366449425961,-31354.633550574
326 | -12.2500774036384,163.749922596362,-30988.2500774036
327 | 0.23338722026117,175.233387220261,-30624.7666127797
328 | 7.08553323356053,181.085533233561,-30268.9144667664
329 | 1.77768610026373,174.777686100264,-29927.2223138997
330 | -5.33249583799775,166.667504162002,NA
331 | -3.52194231506352,167.478057684936,-29244.5219423151
332 | -9.676389714038,160.323610285962,-28909.676389714
333 | 22.2106747102422,191.210674710242,-28538.7893252898
334 | 0.332343005686162,168.332343005686,-28223.6676569943
335 | -0.0560260543282312,166.943973945672,-27889.0560260543
336 | -18.0410281792819,147.958971820718,-27574.0410281793
337 | -17.601027250371,147.398972749629,-27242.6010272504
338 | 6.43005197028487,170.430051970285,-26889.5699480297
339 | 14.1323386317504,177.13233863175,-26554.8676613683
340 | 21.2873949643941,183.287394964394,-26222.7126050356
341 | -6.83663368790321,154.163366312097,-25927.8366336879
342 | -1.1782097718109,158.821790228189,-25601.1782097718
343 | -0.967528004898314,158.032471995102,-25281.9675280049
344 | 10.7486764506202,168.74867645062,-24953.2513235494
345 | 1.45576595822401,158.455765958224,-24647.5442340418
346 | NA,159.13176899058,-24332.8682310094
347 | 6.35816569334053,161.358165693341,-24018.6418343067
348 | -13.6574513827807,140.342548617219,-23729.6574513828
349 | 4.14899890701915,157.148998907019,-23404.851001093
350 | 9.0120315351118,NA,-23094.9879684649
351 | 9.27812603090718,160.278126030907,-22791.7218739691
352 | 14.3990910293475,164.399091029347,-22485.6009089707
353 | -15.9784984708197,133.02150152918,-22216.9784984708
354 | -16.7792699850092,131.220730014991,-21920.779269985
355 | 6.98176781659272,153.981767816593,-21602.0182321834
356 | -8.30215566870221,137.697844331298,-21324.3021556687
357 | -4.02164670149323,140.978353298507,-21029.0216467015
358 | -6.50691833620289,137.493081663797,-20742.5069183362
359 | -8.02690938506061,134.973090614939,-20457.0269093851
360 | 4.64106689991784,146.641066899918,-20159.3589331001
361 | -0.718788040803492,140.281211959196,-19881.7187880408
362 | -8.86211612391727,131.137883876083,-19608.8621161239
363 | -18.5922988250833,120.407701174917,-19339.5922988251
364 | -1.63482796664884,136.365172033351,-19045.6348279666
365 | 5.1240184462262,142.124018446226,-18763.8759815538
366 | 11.168665627226,147.168665627226,-18484.8313343728
367 | 8.23963321753628,143.239633217536,-18216.7603667825
368 | 5.93899982091755,139.938999820918,-17950.0610001791
369 | -4.25037313302133,128.749626866979,-17693.250373133
370 | 1.81364633207338,133.813646332073,-17422.1863536679
371 | -2.75119927479544,128.248800725205,-17163.7511992748
372 | 0.36468447141091,130.364684471411,-16899.6353155286
373 | 0.451347581596385,129.451347581596,-16640.5486524184
374 | -5.50722288782811,122.492777112172,-16389.5072228878
375 | 3.00868935392603,130.008689353926,-16125.9913106461
376 | 6.03597924680919,132.035979246809,-15869.9640207532
377 | 4.24359404217288,129.243594042173,-15620.7564059578
378 | NA,119.28087656221,-15380.7191234378
379 | 14.5361346215774,137.536134621577,-15114.4638653784
380 | 10.3467344217592,132.346734421759,-14873.6532655782
381 | 13.830327666598,134.830327666598,-14627.1696723334
382 | 7.2333729202237,127.233372920224,NA
383 | -5.93735656647783,113.062643433522,-14166.9373565665
384 | -9.87436084007066,108.125639159929,-13933.8743608401
385 | 8.8886476603226,125.888647660323,-13680.1113523397
386 | 4.60188231629073,120.601882316291,-13451.3981176837
387 | 7.72784218487801,122.727842184878,-13217.2721578151
388 | 13.5189328485284,127.518932848528,-12982.4810671515
389 | 1.38942330585812,114.389423305858,-12767.6105766941
390 | -3.97443899659605,108.025561003404,-12547.9744389966
391 | 20.6214498475297,131.62144984753,-12300.3785501525
392 | -17.5891438162142,92.4108561837858,-12117.5891438162
393 | 19.5065977801116,128.506597780112,-11861.4934022199
394 | -4.84964593084772,103.150354069152,-11668.8496459308
395 | -1.38744055675105,105.612559443249,-11450.3874405568
396 | -16.2906387836688,89.7093612163312,-11252.2906387837
397 | NA,NA,-11012.3907875452
398 | -14.1077439191535,89.8922560808465,-10830.1077439192
399 | -19.515255667366,83.484744332634,-10628.5152556674
400 | 5.22463500903376,107.224635009034,-10398.775364991
401 | 1.99549278023119,102.995492780231,-10199.0045072198
402 | 5.77574660328523,105.775746603285,-9994.22425339671
403 | NA,83.7973763071257,-9816.20262369287
404 | -6.16303165707752,91.8369683429225,-9610.16303165708
405 | 4.01609142850684,101.016091428507,-9404.98390857149
406 | -6.35258227266276,NA,-9222.35258227266
407 | -18.4550665848532,76.5449334151468,-9043.45506658485
408 | -2.02469673086054,91.9753032691395,-8838.02469673086
409 | -0.50630363624899,NA,-8649.50630363625
410 | 12.5306150732288,104.530615073229,-8451.46938492677
411 | -22.0974053309113,68.9025946690887,-8303.09740533091
412 | -19.820336271728,70.179663728272,-8119.82033627173
413 | 1.78896999200279,90.7889699920028,-7919.211030008
414 | -3.94451017471007,84.0554898252899,-7747.94451017471
415 | 4.89645642127055,91.8964564212706,-7564.10354357873
416 | 1.57748485101347,87.5774848510135,-7394.42251514899
417 | 1.84230346206637,86.8423034620664,-7223.15769653793
418 | 3.32933460843148,87.3293346084315,-7052.67066539157
419 | -24.1637096846804,58.8362903153196,-6913.16370968468
420 | -13.0541307351287,68.9458692648713,-6737.05413073513
421 | -3.35865946700822,77.6413405329918,-6564.35865946701
422 | 9.81587028777839,89.8158702877784,-6390.18412971222
423 | -7.61846407626734,71.3815359237327,-6248.61846407627
424 | 2.78964019452356,80.7896401945236,-6081.21035980548
425 | -5.27193577646954,71.7280642235305,-5934.27193577647
426 | 1.50240709409589,77.5024070940959,-5774.4975929059
427 | -16.0884527821159,58.9115472178841,-5641.08845278212
428 | 15.4292962206608,89.4292962206608,-5460.57070377934
429 | 3.25879534621418,76.2587953462142,-5325.74120465379
430 | 8.98841790279139,80.9884179027914,-5175.01158209721
431 | NA,56.8535045569069,-5055.14649544309
432 | 2.26963288868339,72.2696328886834,-4897.73036711132
433 | -0.789822202864098,68.2101777971359,-4761.78982220286
434 | -6.57361163315829,61.4263883668417,-4630.57361163316
435 | 3.72709697854662,70.7270969785466,-4485.27290302145
436 | 2.09503038825283,68.0950303882528,-4353.90496961175
437 | -8.98606592306685,56.0139340769332,-4233.98606592307
438 | -8.07371441570947,55.9262855842905,-4104.07371441571
439 | 0.154683117395073,63.1546831173951,-3968.8453168826
440 | -1.24350523171531,60.7564947682847,-3845.24350523172
441 | 1.19185092761792,62.1918509276179,-3719.80814907238
442 | 12.6537474269021,72.6537474269021,-3587.3462525731
443 | 16.9815130472735,75.9815130472735,-3464.01848695273
444 | 7.22378362786968,65.2237836278697,-3356.77621637213
445 | -2.48117470865023,54.5188252913498,-3251.48117470865
446 | 2.9185185132886,58.9185185132886,-3133.08148148671
447 | 8.19481917158294,63.1948191715829,-3016.80518082842
448 | 9.79612364010528,63.7961236401053,NA
449 | 0.820578391637766,53.8205783916378,-2808.17942160836
450 | 6.1426646400082,58.1426646400082,-2697.85733535999
451 | 1.81055069510415,52.8105506951042,NA
452 | 5.81213366932509,55.8121336693251,-2494.18786633067
453 | -6.02443900462176,42.9755609953782,-2407.02443900462
454 | 8.19837900644015,56.1983790064401,-2295.80162099356
455 | 1.00736125979011,48.0073612597901,-2207.99263874021
456 | 8.04073460227165,54.0407346022716,-2107.95926539773
457 | -6.48337707879908,38.5166229212009,-2031.4833770788
458 | -18.3872929869521,25.6127070130479,-1954.38729298695
459 | -13.4201710950248,29.5798289049752,-1862.42017109502
460 | 10.3181608414707,52.3181608414707,-1753.68183915853
461 | NA,20.3856184922044,-1701.6143815078
462 | 4.74079566536518,44.7407956653652,-1595.25920433463
463 | -0.243945480665689,38.7560545193343,NA
464 | -12.0641747521876,25.9358252478124,-1456.06417475219
465 | 2.18232551113565,39.1823255111356,-1366.81767448886
466 | -1.86569911095333,34.1343008890467,-1297.86569911095
467 | -8.34763477742517,26.6523652225748,-1233.34763477743
468 | -6.70377022638064,27.2962297736194,-1162.70377022638
469 | -3.76473723047327,29.2352627695267,-1092.76473723047
470 | 10.286014795104,42.286014795104,NA
471 | 7.64640558209949,38.6464055820995,-953.3535944179
472 | -12.4223242142771,17.5776757857229,NA
473 | -15.3580600522366,13.6419399477634,-856.358060052237
474 | -2.7546286970986,25.2453713029014,-786.754628697099
475 | -0.816413970090352,26.1835860299096,-729.81641397009
476 | -14.4558884298015,11.5441115701985,-690.455888429801
477 | 0.416813868853537,25.4168138688535,-624.583186131146
478 | -6.3038422883684,17.6961577116316,-582.303842288368
479 | 1.00385799829052,24.0038579982905,-527.996142001709
480 | -2.06206074115041,19.9379392588496,-486.06206074115
481 | 0.0775012076121474,21.0775012076121,-440.922498792388
482 | 9.17891125499711,29.1789112549971,-390.821088745003
483 | 9.91912355676178,28.9191235567618,-351.080876443238
484 | -14.5520523083471,3.44794769165293,NA
485 | 4.34636312334071,21.3463631233407,-284.653636876659
486 | -17.6050358691066,-1.60503586910659,-273.605035869107
487 | 16.4137338443373,31.4137338443373,-208.586266155663
488 | 3.2899079708406,17.2899079708406,-192.710092029159
489 | 8.67333785782265,21.6733378578227,-160.326662142177
490 | 4.38116307213271,16.3811630721327,NA
491 | -10.3668484444118,0.633151555588217,-131.366848444412
492 | 10.9882503272617,20.9882503272617,-89.0117496727383
493 | 3.80939939301041,12.8093993930104,-77.1906006069896
494 | -5.15484733031509,2.84515266968491,-69.1548473303151
495 | -1.52668212028725,5.47331787971275,-50.5266821202872
496 | 8.6644687139296,14.6644687139296,-27.3355312860704
497 | 8.2601550777275,13.2601550777275,-16.7398449222725
498 | 10.068234812964,14.068234812964,-5.93176518703597
499 | NA,1.86614273154914,-10.1338572684509
500 | 15.7706878161089,17.7706878161089,11.7706878161089
501 | -14.4009277915753,-13.4009277915753,-15.4009277915753
502 | -7.53976446840454,-7.53976446840454,-7.53976446840454
503 | -2.39549688382698,NA,-3.39549688382698
504 | 0.150538601456505,2.15053860145651,-3.84946139854349
505 | 13.8692553099038,16.8692553099038,4.86925530990378
506 | -10.6520900379986,-6.65209003799861,-26.6520900379986
507 | 2.64812009840244,7.64812009840244,-22.3518799015976
508 | -13.8078140169599,-7.80781401695988,NA
509 | 9.3274745751037,16.3274745751037,-39.6725254248963
510 | -10.2296710622364,-2.22967106223638,-74.2296710622364
511 | -6.74308147817495,2.25691852182505,-87.7430814781749
512 | -2.04816189964136,7.95183810035864,NA
513 | -4.04680527578107,6.95319472421893,-125.046805275781
514 | 6.88881952133604,18.888819521336,-137.111180478664
515 | NA,NA,-183.433059184529
516 | 2.25026164627264,16.2502616462726,-193.749738353727
517 | NA,15.0498977956706,-224.950102204329
518 | -17.7593042298037,-1.75930422980374,-273.759304229804
519 | -1.63460883196287,15.3653911680371,NA
520 | 27.7029727051096,45.7029727051096,-296.29702729489
521 | -10.7361182999644,8.26388170003561,-371.736118299964
522 | -5.21237791208784,14.7876220879122,-405.212377912088
523 | -6.44016542000442,14.5598345799956,-447.440165420004
524 | -8.79890556466638,13.2010944353336,-492.798905564666
525 | 15.0400170435458,38.0400170435458,-513.959982956454
526 | NA,43.3169597526548,-556.683040247345
527 | -1.30655624485332,23.6934437551467,-626.306556244853
528 | -17.7196362047579,8.28036379524212,NA
529 | -10.9606477924032,16.0393522075968,-739.960647792403
530 | NA,33.0862657830691,-778.913734216931
531 | -18.7977833173593,10.2022166826407,-859.797783317359
532 | 4.86281904133198,34.862819041332,-895.137180958668
533 | 11.1834814772347,42.1834814772347,-949.816518522765
534 | -13.6209563995055,18.3790436004945,-1037.62095639951
535 | -12.5468235629716,20.4531764370284,-1101.54682356297
536 | -3.26555483450762,NA,-1159.26555483451
537 | 4.88658046959534,39.8865804695953,-1220.1134195304
538 | -12.3929938030509,23.6070061969491,-1308.39299380305
539 | 8.03070370648856,NA,-1360.96929629351
540 | -6.74802966148667,NA,-1450.74802966149
541 | -1.47647267368283,37.5235273263172,-1522.47647267368
542 | 15.3877486640325,55.3877486640325,-1584.61225133597
543 | 10.9676724398033,51.9676724398033,-1670.0323275602
544 | 9.3147793545691,51.3147793545691,-1754.68522064543
545 | -7.94012618418086,35.0598738158191,-1856.94012618418
546 | -1.37319371997205,42.6268062800279,-1937.37319371997
547 | 12.2006659737446,57.2006659737446,-2012.79933402626
548 | -4.81172464883714,41.1882753511629,-2120.81172464884
549 | 6.10401463892476,53.1040146389248,-2202.89598536108
550 | 14.4818894802253,62.4818894802253,-2289.51811051977
551 | 10.7788185272438,59.7788185272438,-2390.22118147276
552 | 17.3001548312883,67.3001548312883,-2482.69984516871
553 | NA,53.768232141344,-2598.23176785866
554 | -9.94007146083015,42.0599285391699,-2713.94007146083
555 | -5.22612630251312,47.7738736974869,-2814.22612630251
556 | 18.1189657624604,72.1189657624604,-2897.88103423754
557 | 15.0978703227409,70.0978703227409,-3009.90212967726
558 | 7.38668774718786,63.3866877471879,-3128.61331225281
559 | -18.581447020701,NA,-3267.5814470207
560 | -1.12375074724748,56.8762492527525,-3365.12375074725
561 | 5.85665444925622,64.8566544492562,-3475.14334555074
562 | -14.3652590542801,45.6347409457199,-3614.36525905428
563 | 24.5213430151443,85.5213430151443,-3696.47865698486
564 | NA,58.2150264953006,-3847.7849735047
565 | -10.9204059018046,52.0795940981954,-3979.9204059018
566 | -9.50010826824079,54.4998917317592,-4105.50010826824
567 | 3.12253733613454,68.1225373361345,-4221.87746266387
568 | -10.5594292110351,55.4405707889649,-4366.55942921104
569 | 4.48381692363331,NA,-4484.51618307637
570 | -4.65187836757334,63.3481216324267,-4628.65187836757
571 | -8.86793981753588,60.1320601824641,-4769.86793981754
572 | -5.05244292497743,64.9475570750226,-4905.05244292498
573 | 8.64081748211685,79.6408174821168,-5032.35918251788
574 | -1.15970869995478,NA,-5185.15970869995
575 | -1.99233864920552,71.0076613507945,-5330.99233864921
576 | 8.85784942400344,82.8578494240034,-5467.142150576
577 | 10.4202387975456,85.4202387975456,-5614.57976120245
578 | -1.66548024942545,74.3345197505746,-5777.66548024943
579 | -6.83100965702602,70.168990342974,-5935.83100965703
580 | -0.997843412322988,NA,-6084.99784341232
581 | -18.7083786583352,60.2916213416648,-6259.70837865834
582 | 7.92464418249362,87.9246441824936,-6392.07535581751
583 | 9.34324345730678,90.3432434573068,-6551.65675654269
584 | 6.9439371015606,88.9439371015606,-6717.05606289844
585 | -0.684582273782629,82.3154177262174,-6889.68458227378
586 | -6.24008798954491,77.7599120104551,-7062.24008798954
587 | -2.96118115091889,82.0388188490811,-7227.96118115092
588 | -6.28125223452861,79.7187477654714,-7402.28125223453
589 | 7.61599025464523,NA,-7561.38400974535
590 | -11.4317964749455,76.5682035250545,-7755.43179647495
591 | -6.2244155769132,NA,-7927.22441557691
592 | 1.60975600364268,91.6097560036427,-8098.39024399636
593 | 5.70554210159641,96.7055421015964,-8275.2944578984
594 | 0.107760249353106,92.1077602493531,-8463.89223975065
595 | -9.92835630779587,83.0716436922041,-8658.9283563078
596 | 17.7062224004433,111.706222400443,-8818.29377759956
597 | 14.3444524995555,NA,-9010.65554750044
598 | 11.8992238727779,107.899223872778,-9204.10077612722
599 | 8.89310151924384,105.893101519244,-9400.10689848076
600 | NA,113.005247431322,-9588.99475256868
601 | 16.8429190819164,115.842919081916,-9784.15708091808
602 | 10.0863696600592,110.086369660059,-9989.91363033994
603 | 9.98779783587724,110.987797835877,-10191.0122021641
604 | 12.8563240453033,114.856324045303,-10391.1436759547
605 | 6.49892104696125,109.498921046961,-10602.501078953
606 | 5.24651452218954,109.24651452219,-10810.7534854778
607 | -9.21627499261352,95.7837250073865,-11034.2162749926
608 | 0.0738886246236505,106.073888624624,-11235.9261113754
609 | -0.281054611973225,106.718945388027,-11449.281054612
610 | 3.54880169170882,NA,-11660.4511983083
611 | -11.8801973858473,97.1198026141527,-11892.8801973858
612 | -7.57738103374405,102.422618966256,-12107.5773810337
613 | NA,96.4907539324426,-12335.5092460676
614 | 11.6026040384085,NA,-12532.3973959616
615 | -4.94527199778479,108.054728002215,-12773.9452719978
616 | -33.9842637672717,80.0157362327283,-13029.9842637673
617 | 24.1474946308353,139.147494630835,-13200.8525053692
618 | 9.58667847703962,125.58667847704,-13446.413321523
619 | 5.8833513123193,122.883351312319,-13683.1166486877
620 | 4.46205794388402,122.462057943884,-13919.5379420561
621 | -2.67729219298474,116.322707807015,-14163.677292193
622 | -4.77668775187588,115.223312248124,-14404.7766877519
623 | -0.610736587471765,120.389263412528,-14641.6107365875
624 | -8.99033222660606,113.009667773394,-14892.9903322266
625 | 21.7737167035625,144.773716703563,-15107.2262832964
626 | 0.122944768239039,124.122944768239,-15375.8770552318
627 | -2.99935744496538,122.000642555035,-15627.999357445
628 | 0.645739133235881,126.645739133236,-15875.3542608668
629 | NA,120.628743143074,-16135.3712568569
630 | NA,125.467171689036,-16386.532828311
631 | -8.86163035859746,120.138369641403,-16649.8616303586
632 | -3.36925028475569,126.630749715244,-16903.3692502848
633 | 6.10943352488785,137.109433524888,-17154.8905664751
634 | -2.80755747300529,129.192442526995,-17426.807557473
635 | 0.233352656946647,133.233352656947,-17688.7666473431
636 | -10.4216628181219,123.578337181878,NA
637 | -19.8531563245835,115.146843675417,-18244.8531563246
638 | -1.59241388441367,134.407586115586,-18497.5924138844
639 | -16.0801538720373,120.919846127963,-18785.080153872
640 | 19.8136204336778,157.813620433678,-19024.1863795663
641 | -2.76268902185469,136.237310978145,-19323.7626890219
642 | 6.32867928723853,146.328679287239,-19593.6713207128
643 | -0.513952132335796,140.486047867664,-19881.5139521323
644 | 12.142441537474,154.142441537474,-20151.8575584625
645 | -8.88634282106792,NA,-20457.8863428211
646 | 6.80800277300723,150.808002773007,-20729.191997227
647 | -10.6659654759033,134.334034524097,-21035.6659654759
648 | 13.2429787415029,159.242978741503,NA
649 | -20.8510162168392,126.148983783161,-21629.8510162168
650 | -9.34950533937452,138.650494660625,-21913.3495053394
651 | 3.30576427185301,152.305764271853,-22197.6942357281
652 | 10.0016607196553,160.001660719655,NA
653 | 2.53128589715722,153.531285897157,-22798.4687141028
654 | NA,169.895018953569,-23086.1049810464
655 | 3.47197899027792,156.471978990278,-23405.5280210097
656 | 1.18837147398423,155.188371473984,-23714.811628526
657 | 7.98438253399258,162.984382533993,-24017.015617466
658 | -16.374389860157,NA,-24352.3743898602
659 | 8.45530003363516,165.455300033635,-24640.5446999664
660 | -14.3862822932361,143.613717706764,NA
661 | 12.7267737849221,171.726773784922,-25268.2732262151
662 | NA,NA,-25600.0739069623
663 | -26.7297367203789,134.270263279621,-25947.7297367204
664 | NA,170.25019592393,-26235.7498040761
665 | -4.04417320720569,158.955826792794,-26573.0441732072
666 | -9.79667564403804,154.203324355962,-26905.796675644
667 | 4.56296519995819,169.562965199958,-27220.4370348
668 | -1.38036169780591,164.619638302194,-27557.3803616978
669 | -6.29758881902369,160.702411180976,-27895.297588819
670 | -3.10087613861085,164.899123861389,-28227.1008761386
671 | 10.2721819854403,179.27218198544,-28550.7278180146
672 | 6.43835335837489,NA,-28893.5616466416
673 | -24.6551758784006,146.344824121599,-29265.6551758784
674 | -6.53113155404074,165.468868445959,NA
675 | 21.8051418505153,194.805141850515,-29907.1948581495
676 | -16.0556115612376,157.944388438762,-30292.0556115612
677 | -14.1004222808724,160.899577719128,-30639.1004222809
678 | -12.2571868867353,NA,-30988.2571868867
679 | 7.41647118434432,184.416471184344,-31321.5835288157
680 | 16.4538615791345,194.453861579135,-31667.5461384209
681 | -2.94910909522574,176.050890904774,-32043.9491090952
682 | -8.36667793006495,171.633322069935,-32408.3666779301
683 | -1.5228787840537,179.477121215946,-32762.5228787841
684 | 2.49169804148079,184.491698041481,-33121.5083019585
685 | -7.38269093762674,175.617309062373,-33496.3826909376
686 | 12.3039925943835,196.303992594383,-33843.6960074056
687 | 4.66195756763859,189.661957567639,-34220.3380424324
688 | -3.07966186394432,182.920338136056,-34599.0796618639
689 | 6.95193574546771,193.951935745468,-34962.0480642545
690 | 0.572548334057169,188.572548334057,-35343.4274516659
691 | -1.0328032107061,187.967196789294,-35722.0328032107
692 | -12.2695886929943,177.730411307006,-36112.269588693
693 | -0.211269544563074,190.788730455437,-36481.2112695446
694 | -4.52417054153588,187.475829458464,-36868.5241705415
695 | -6.68824131702804,186.311758682972,-37255.688241317
696 | -5.58879955171365,188.411200448286,-37641.5887995517
697 | -3.08228372182559,191.917716278174,-38028.0822837218
698 | 4.39915616252833,200.399156162528,-38411.6008438375
699 | 2.97113044134424,199.971130441344,NA
700 | 2.51689314044674,200.516893140447,-39201.4831068596
701 | 6.50793823464231,205.507938234642,-39594.4920617654
702 | NA,196.383918725058,-40003.6160812749
703 | 12.30910412972,213.30910412972,-40388.6908958703
704 | NA,202.494664256963,-40803.505335743
705 | -8.26626370175504,194.733736298245,-41217.2662637018
706 | 7.09598473635784,211.095984736358,-41608.9040152636
707 | 4.20736145471901,209.207361454719,-42020.7926385453
708 | 7.20928511408149,213.209285114081,-42428.7907148859
709 | -14.9215300701712,192.078469929829,-42863.9215300702
710 | -10.6006475801135,197.399352419886,-43274.6006475801
711 | NA,210.337914204509,-43679.6620857955
712 | 2.79694162767306,212.796941627673,-44097.2030583723
713 | 9.79969236001963,NA,-44511.20030764
714 | -7.98328805064708,204.016711949353,-44951.9832880506
715 | 17.4287672641476,230.428767264148,-45351.5712327358
716 | -20.9711644513341,193.028835548666,-45816.9711644513
717 | -10.1622078104068,204.837792189593,-46235.1622078104
718 | 11.1183622922041,227.118362292204,-46644.8816377078
719 | NA,227.823152607413,-47078.1768473926
720 | -18.3723157233631,199.627684276637,-47542.3723157234
721 | 4.39951024419529,223.399510244195,NA
722 | -8.85273755503474,211.147262444965,NA
723 | 6.14117620467376,227.141176204674,-48834.8588237953
724 | -4.59419644405593,217.405803555944,-49288.5941964441
725 | -8.07740017685674,214.922599823143,-49737.0774001769
726 | NA,203.008549435616,-50196.9914505644
727 | -3.55856104486838,221.441438955132,-50628.5585610449
728 | 0.962351091015784,226.962351091016,-51075.037648909
729 | 5.81955122618082,232.819551226181,-51523.1804487738
730 | -8.42979135854895,219.570208641451,-51992.4297913585
731 | -3.17572563345997,225.82427436654,NA
732 | 2.92695317388403,232.926953173884,-52897.0730468261
733 | 5.03750795944561,236.037507959446,-53355.9624920406
734 | -2.92296793442726,229.077032065573,-53826.9229679344
735 | -5.226490994492,227.773509005508,-54294.2264909945
736 | -0.339425686967397,233.660574313033,NA
737 | 11.9503413763604,246.95034137636,-55213.0496586236
738 | -2.99160073266251,233.008399267337,-55698.9916007327
739 | 2.92200591386977,239.92200591387,-56166.0779940861
740 | -10.1742630918038,227.825736908196,-56654.1742630918
741 | -21.9527512229075,217.047248777092,-57142.9527512229
742 | 18.997685301614,258.997685301614,-57581.0023146984
743 | 2.33524017701429,243.335240177014,-58078.664759823
744 | -2.43527518166968,239.56472481833,-58566.4352751817
745 | -6.43881515068881,236.561184849311,-59055.4388151507
746 | 7.82192853362503,251.821928533625,-59528.1780714664
747 | 11.8973084014001,256.8973084014,-60013.1026915986
748 | 14.476643305774,260.476643305774,-60501.5233566942
749 | 4.62220893460546,251.622208934605,-61004.3777910654
750 | 5.77963251004145,253.779632510041,-61498.22036749
751 | -2.71413796282027,246.28586203718,-62003.7141379628
752 | 19.7275221438101,269.72752214381,-62480.2724778562
753 | NA,237.366404782388,-63014.6335952176
754 | NA,229.214578246571,-63526.7854217534
755 | 7.21683167239703,260.216831672397,-64001.7831683276
756 | 10.7607037025968,NA,-64505.2392962974
757 | 4.96695941653927,259.966959416539,-65020.0330405835
758 | 1.59361033905506,257.593610339055,-65534.4063896609
759 | 16.5683974134533,273.568397413453,-66032.4316025865
760 | 13.6694365106732,271.669436510673,-66550.3305634893
761 | 2.10701340310128,261.107013403101,NA
762 | 1.98669710810489,261.986697108105,-67598.0133028919
763 | -5.99328805380289,255.006711946197,-68126.9932880538
764 | NA,261.678361002923,-68644.3216389971
765 | 12.5324533047917,275.532453304792,-69156.4675466952
766 | 10.2897497811132,274.289749781113,-69685.7102502189
767 | -5.10581207471904,259.894187925281,-70230.1058120747
768 | -19.1261177393958,246.873882260604,-70775.1261177394
769 | -7.22703146618972,259.77296853381,-71296.2270314662
770 | -7.49538908560765,260.504610914392,-71831.4953890856
771 | -13.9903345539331,255.009665446067,-72374.9903345539
772 | 12.4212102371258,282.421210237126,-72887.5787897629
773 | -6.91233050034148,264.087669499659,-73447.9123305003
774 | -10.0817992894704,261.91820071053,-73994.0817992895
775 | 2.67354870049896,NA,-74526.3264512995
776 | 0.244159559156272,274.244159559156,NA
777 | 6.10769536694611,281.107695366946,-75618.8923046331
778 | 12.7768376735555,288.776837673556,-76163.2231623264
779 | -0.213771668207007,276.786228331793,-76729.2137716682
780 | -10.3993141088909,267.600685891109,-77294.3993141089
781 | 7.48761143506866,286.487611435069,-77833.5123885649
782 | -11.9745614858962,268.025438514104,-78411.9745614859
783 | 6.82548217219321,287.825482172193,-78954.1745178278
784 | 0.323354565925003,282.323354565925,-79523.6766454341
785 | -23.1883552140509,259.811644785949,-80112.188355214
786 | -2.87575630104142,281.124243698959,-80658.875756301
787 | NA,278.547640198234,-81231.4523598018
788 | -3.87738747426764,NA,NA
789 | -12.096618063484,274.903381936516,-82381.0966180635
790 | NA,298.805416266303,-82933.1945837337
791 | 4.72970913814775,293.729709138148,-83516.2702908619
792 | 12.521333640377,302.521333640377,-84087.4786663596
793 | -18.2574762077492,272.742523792251,-84699.2574762077
794 | 6.02026036068589,298.020260360686,-85257.9797396393
795 | NA,296.541770884719,-85845.4582291153
796 | -3.81836419939036,290.18163580061,-86439.8183641994
797 | -14.3567190253763,280.643280974624,-87039.3567190254
798 | -6.5638675701017,289.436132429898,-87622.5638675701
799 | -1.47178669439696,295.528213305603,-88210.4717866944
800 | 1.89516988684296,299.895169886843,-88802.1048301132
801 | NA,306.651240628037,-89393.348759372
802 | -3.70837353825334,296.291626461747,-90003.7083735382
803 | -4.84101338158333,296.158986618417,-90605.8410133816
804 | -4.48577359709881,297.514226402901,-91208.4857735971
805 | 7.26253252251694,310.262532522517,-91801.7374674775
806 | 12.5067832115923,316.506783211592,-92403.4932167884
807 | NA,297.487601852177,-93032.5123981478
808 | -2.64761694098151,303.352383059019,-93638.647616941
809 | -0.345512000049881,306.65448799995,-94249.345512
810 | 2.81216358061449,310.812163580614,-94861.1878364194
811 | -4.901620176634,304.098379823366,-95485.9016201766
812 | -1.89020765849988,308.1097923415,-96101.8902076585
813 | 3.96551910887607,314.965519108876,-96717.0344808911
814 | -2.81510163439772,309.184898365602,-97346.8151016344
815 | 2.03826705153913,315.038267051539,-97966.9617329485
816 | -19.9049048903275,294.095095109672,-98615.9049048903
817 | 3.79577063743909,318.795770637439,-99221.2042293626
818 | -1.84481321294895,314.155186787051,-99857.844813213
819 | -14.4808343484659,302.519165651534,-100503.480834348
820 | -12.3654245780611,305.634575421939,-101136.365424578
821 | -13.5745017509701,305.42549824903,-101774.574501751
822 | -4.88058958837628,315.119410411624,-102404.880589588
823 | 7.67619522784334,328.676195227843,-103033.323804772
824 | -2.63665843383552,NA,-103686.636658434
825 | -7.30416751842107,315.695832481579,-104336.304167518
826 | 1.92562967235124,325.925629672351,-104974.074370328
827 | NA,339.717481367442,-105610.282518633
828 | 2.23258111232975,328.23258111233,-106273.767418888
829 | 11.5195113488801,338.51951134888,-106917.480488651
830 | 4.02662169584606,332.026621695846,-107579.973378304
831 | 7.92042568207374,336.920425682074,-108233.079574318
832 | 18.7356538111309,348.735653811131,-108881.264346189
833 | -5.92017965418968,325.07982034581,-109566.920179654
834 | -26.894880930526,305.105119069474,-110250.894880931
835 | 6.81938392108683,339.819383921087,-110882.180616079
836 | -3.60095735329387,330.399042646706,-111559.600957353
837 | 5.69045296542246,340.690452965422,-112219.309547035
838 | 20.6849954513645,356.684995451365,-112875.315004549
839 | 20.702735826414,357.702735826414,-113548.297264174
840 | -11.8567015297049,326.143298470295,-114255.85670153
841 | 12.1709085598216,351.170908559822,-114908.82909144
842 | 0.713620788406968,340.713620788407,-115599.286379212
843 | -8.99279807174457,332.007201928255,-116289.992798072
844 | -11.4838945989798,NA,-116975.483894599
845 | -12.7031813879043,330.296818612096,-117661.703181388
846 | 12.8365859168959,356.836585916896,-118323.163414083
847 | -12.1230145151778,332.876985484822,-119037.123014515
848 | 23.0290001467885,369.029000146788,-119692.970999853
849 | -4.56346702637965,342.43653297362,-120413.563467026
850 | -5.06442266506751,342.935577334932,-121109.064422665
851 | -19.8785822772327,329.121417722767,-121820.878582277
852 | -14.1817793915634,335.818220608437,-122514.181779392
853 | 10.5802105404704,361.58021054047,-123190.41978946
854 | 15.45681907697,367.45681907697,-123888.543180923
855 | 3.61588996894255,356.615889968943,-124605.384110031
856 | -0.379008241556369,353.620991758444,-125316.379008242
857 | 35.3330623386149,390.333062338615,-125989.666937661
858 | -9.64008745155252,346.359912548447,-126745.640087452
859 | 8.5060502052833,365.506050205283,-127440.493949795
860 | -1.21751958512581,356.782480414874,-128165.217519585
861 | 1.90862598249357,360.908625982494,NA
862 | -7.64274867433343,352.357251325667,-129607.642748674
863 | -18.6955059500918,342.304494049908,-130339.69550595
864 | 2.19827955027047,364.19827955027,-131041.80172045
865 | 1.46736092872906,364.467360928729,-131767.532639071
866 | -10.8376871707957,353.162312829204,-132506.837687171
867 | 18.7147542200113,383.714754220011,-133206.28524578
868 | 5.2694373547109,371.269437354711,-133950.730562645
869 | -0.56214583900979,366.43785416099,-134689.562145839
870 | 12.4686452939199,380.46864529392,-135411.531354706
871 | -7.70520528425845,361.294794715742,-136168.705205284
872 | 8.53248013035957,378.53248013036,-136891.46751987
873 | -13.1469811176334,357.853018882367,-137654.146981118
874 | 4.28836597260642,376.288365972606,-138379.711634027
875 | -3.49319938905116,369.506800610949,-139132.493199389
876 | -2.39407900311311,371.605920996887,-139878.394079003
877 | -20.5511964573748,354.448803542625,-140645.551196457
878 | 8.30524439119723,384.305244391197,-141367.694755609
879 | 0.730924438670374,377.73092443867,NA
880 | 11.1746631470609,389.174663147061,-142872.825336853
881 | -8.28357820457808,370.716421795422,-143649.283578205
882 | 3.49653801389179,383.496538013892,NA
883 | NA,NA,-145164.41895134
884 | 4.030543463448,386.030543463448,-145919.969456537
885 | -11.3353842697382,371.664615730262,-146700.33538427
886 | -0.189415533963909,383.810584466036,-147456.189415534
887 | -5.45520158428216,379.544798415718,-148230.455201584
888 | -9.39570968775395,376.604290312246,-149005.395709688
889 | -9.80991742243168,377.190082577568,-149778.809917422
890 | 18.0737643063123,406.073764306312,-150525.926235694
891 | -14.597886555614,NA,-151335.597886556
892 | -8.2474392833107,381.752560716689,-152108.247439283
893 | -13.9142250964059,377.085774903594,-152894.914225096
894 | 3.64114152480662,395.641141524807,-153660.358858475
895 | -12.2538944084125,380.746105591587,-154461.253894408
896 | 8.61262111265418,402.612621112654,-155227.387378887
897 | 1.6646039274845,396.664603927485,-156023.335396073
898 | 3.62686247649461,399.626862476495,-156812.373137524
899 | -22.0944225280066,374.905577471993,-157631.094422528
900 | 26.4133175238047,424.413317523805,-158377.586682476
901 | -0.999906773485587,398.000093226514,-159201.999906773
902 | -7.29171790803853,392.708282091961,-160007.291717908
903 | 8.92827905043416,409.928279050434,-160792.07172095
904 | -7.05450685566596,394.945493144334,-161611.054506856
905 | 8.58031162020582,411.580311620206,-162400.41968838
906 | 9.90817145852465,413.908171458525,-163206.091828541
907 | -2.25642127758782,402.743578722412,-164027.256421278
908 | 8.02402671991871,414.024026719919,-164827.97597328
909 | 3.34787820264946,410.347878202649,-165645.652121797
910 | 7.12442678704475,415.124426787045,-166456.875573213
911 | -16.0060291759219,392.993970824078,-167297.006029176
912 | 8.20242172928525,418.202421729285,-168091.797578271
913 | -6.17349506757299,404.826504932427,-168927.173495068
914 | -17.9347410946175,394.065258905383,-169761.934741095
915 | 6.74612335606387,419.746123356064,-170562.253876644
916 | -3.72305064843455,410.276949351565,-171399.723050648
917 | -18.3642681942493,396.635731805751,-172243.364268194
918 | 3.98659785658303,419.986597856583,-173052.013402143
919 | 2.46697214771214,419.466972147712,-173886.533027852
920 | 8.25314523898725,426.253145238987,-174715.746854761
921 | -16.3254604613521,402.674539538648,-175577.325460461
922 | 10.2466931139349,430.246693113935,-176389.753306886
923 | 11.2579773409734,432.257977340973,-177229.742022659
924 | 14.8868349806631,436.886834980663,-178069.113165019
925 | -9.91027079136712,413.089729208633,-178938.910270791
926 | 10.0846152523678,434.084615252368,-179765.915384748
927 | 10.033961132722,435.033961132722,-180614.966038867
928 | 19.4872792864784,445.487279286478,-181456.512720714
929 | -2.36498338307293,424.635016616927,-182331.364983383
930 | -18.0742703814286,409.925729618571,-183202.074270381
931 | 4.2854387665671,433.285438766567,-184036.714561233
932 | 12.3281289588236,442.328128958824,-184887.671871041
933 | -13.2408915898606,417.759108410139,-185774.24089159
934 | 0.890335743848767,432.890335743849,-186623.109664256
935 | -10.896519369116,422.103480630884,-187499.896519369
936 | -11.0894051156182,422.910594884382,-188367.089405116
937 | 15.2635136282408,450.263513628241,-189209.736486372
938 | 11.4550049494299,447.45500494943,-190084.544995051
939 | 4.00249613895046,441.00249613895,-190964.997503861
940 | 4.47618776899719,442.476187768997,-191839.523812231
941 | -3.56514921510299,435.434850784897,-192724.565149215
942 | -1.32381393363285,438.676186066367,-193601.323813934
943 | 1.4656938664313,442.465693866431,-194479.534306134
944 | -1.45299991977582,440.547000080224,-195365.45299992
945 | 4.68641279070251,447.686412790702,-196244.313587209
946 | -11.027091123669,432.972908876331,-197147.027091124
947 | 1.89007624567715,446.890076245677,-198023.109923754
948 | 7.25506221307761,453.255062213078,-198908.744937787
949 | NA,434.21105784034,-199821.78894216
950 | 5.08629555738184,453.086295557382,-200698.913704443
951 | NA,456.473955620616,-201593.526044379
952 | 0.892291380965081,450.892291380965,-202499.107708619
953 | -8.24246944544852,442.757530554551,-203409.242469445
954 | 1.19060884252431,453.190608842524,-204302.809391157
955 | -13.9821316106556,NA,-205222.982131611
956 | 2.17974715467324,456.179747154673,-206113.820252845
957 | 4.7101061731545,459.710106173154,-207020.289893827
958 | -1.64052577919065,454.359474220809,-207937.640525779
959 | -5.76787086094377,451.232129139056,-208854.767870861
960 | -5.1053205264906,452.894679473509,-209769.105320526
961 | -1.04982179764579,457.950178202354,-210682.049821798
962 | 4.14831418862799,464.148314188628,-211595.851685811
963 | 5.73845459686489,NA,-212515.261545403
964 | -2.83011653497051,459.169883465029,-213446.830116535
965 | -11.6661176875241,451.333882312476,-214380.666117688
966 | -1.99697151026301,462.003028489737,-215297.99697151
967 | 10.4496193442997,475.4496193443,-216214.550380656
968 | -5.27166075885522,460.728339241145,-217161.271660759
969 | 6.62297910493918,473.622979104939,-218082.377020895
970 | 2.38962837370768,470.389628373708,-219021.610371626
971 | -13.2453446481541,455.754655351846,-219974.245344648
972 | 5.79351590528125,475.793515905281,-220894.206484095
973 | -11.1618460956336,459.838153904366,-221852.161846096
974 | 13.8413127025685,485.841312702569,-222770.158687297
975 | 4.17027048654253,477.170270486543,-223724.829729513
976 | -5.90682738264821,468.093172617352,-224681.906827383
977 | 11.8295657143184,486.829565714318,-225613.170434286
978 | -0.923854101223184,475.076145898777,-226576.923854101
979 | 3.07007463924511,480.070074639245,-227525.929925361
980 | -9.10399340534712,468.896006594653,-228493.103993405
981 | -6.93069056295894,472.069309437041,-229447.930690563
982 | 24.0280355364959,504.028035536496,-230375.971964464
983 | NA,478.639679989278,-231363.360320011
984 | 1.91160603549883,483.911606035499,-232322.088393965
985 | -0.588654829691102,482.411345170309,-233289.58865483
986 | 11.6767619492437,495.676761949244,-234244.323238051
987 | 6.7078055585454,491.707805558545,-235218.292194441
988 | 6.93322115787572,NA,-236189.066778842
989 | 7.9970415128052,494.997041512805,-237161.002958487
990 | -10.6402636617322,477.359736338268,-238154.640263662
991 | -8.73412125325873,480.265878746741,-239129.734121253
992 | -7.6152057758142,482.384794224186,-240107.615205776
993 | 6.71614251666255,497.716142516663,NA
994 | -3.9622651415599,488.03773485844,-242067.962265142
995 | 2.88590283450715,495.885902834507,-243046.114097165
996 | -5.92764500774785,488.072354992252,-244041.927645008
997 | 18.263939089187,513.263939089187,-245006.736060911
998 | -4.38753536537589,491.612464634624,-246020.387535365
999 | -1.91323376473378,495.086766235266,-247010.913233765
1000 | -4.77687579005279,493.223124209947,-248008.77687579
1001 | 7.09218825602436,506.092188256024,-248993.907811744
1002 |
--------------------------------------------------------------------------------
/data/merge_practice_1.csv:
--------------------------------------------------------------------------------
1 | id,name,job,location
2 | 1,Alice,communications,New York
3 | 2,Bob,communications,Cambridge
4 | 3,Chuck,hacker,New York
5 | 4,Dave,communications,Berkeley
6 | 5,Eve,spy,Cambridge
--------------------------------------------------------------------------------
/data/merge_practice_2.csv:
--------------------------------------------------------------------------------
1 | id,name,job,location
2 | 1,Alice,hacker,cambridge
3 | 4,Dave,tree,palo alto
4 | 5,Eve,handler,new york
5 | 6,Faith,hacker,berkeley
--------------------------------------------------------------------------------
/data/merge_practice_3.csv:
--------------------------------------------------------------------------------
1 | location,population
2 | Cambridge,107289
3 | New York,8406000
4 | Berkeley,116768
5 | Palo Alto,66642
6 | Reno,233294
--------------------------------------------------------------------------------
/data/mydata.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/data/mydata.Rda
--------------------------------------------------------------------------------
/data/mydata.csv:
--------------------------------------------------------------------------------
1 | "n","c","b","d","really.long.and.complicated.variable.name"
2 | 1,"one",TRUE,2015-07-27,999
3 | 2,"two",TRUE,2015-08-03,999
4 | 3,"three",FALSE,2015-07-20,999
5 |
--------------------------------------------------------------------------------
/data/pew.sav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/data/pew.sav
--------------------------------------------------------------------------------
/examples/save_console_output.R:
--------------------------------------------------------------------------------
1 | sink('examples/save_console_output.txt')
2 |
3 | print("This is an example of redirecting console output to a pdf using sink()")
4 |
5 | t.test(c(1,2,3,4), c(5,6,7,8))
6 |
7 | sink()
8 |
--------------------------------------------------------------------------------
/examples/save_console_output.txt:
--------------------------------------------------------------------------------
1 | [1] "This is an example of redirecting console output to a pdf using sink()"
2 |
3 | Welch Two Sample t-test
4 |
5 | data: c(1, 2, 3, 4) and c(5, 6, 7, 8)
6 | t = -4.3818, df = 6, p-value = 0.004659
7 | alternative hypothesis: true difference in means is not equal to 0
8 | 95 percent confidence interval:
9 | -6.233715 -1.766285
10 | sample estimates:
11 | mean of x mean of y
12 | 2.5 6.5
13 |
14 |
--------------------------------------------------------------------------------
/instructor/day_four.R:
--------------------------------------------------------------------------------
1 | ## ---- echo=FALSE---------------------------------------------------------
2 | knitr::opts_knit$set(root.dir = '../')
3 |
4 | ## ------------------------------------------------------------------------
5 | ## I want to create a matrix of 2 to the power of n where n is 1 to 10.
6 |
7 | mat <- c(rep(NA, 10)) # first create a null vector
8 |
9 | # There are many ways to do the same task
10 | mat <- c(rep(NA, 6))
11 | for(i in 1:6){ # I want to create a matrix of 2 to the power of n where n is 5 to 10.
12 | mat[i] <- 2^(i+4)
13 | } # or
14 |
15 | mat <- c(rep(NA, 6))
16 | for(i in 5:10){
17 | mat[i-4] <- 2^i
18 | } # by setting sequence and statement accordingly
19 |
20 | ## ------------------------------------------------------------------------
21 | a <- c("Berkeley", "SF", "Oakland")
22 | b <- c(20, 18, 22)
23 | city.temp <- data.frame(cbind(a, b))
24 |
25 | for(city in c("Berkeley", "Walnut Creek", "Richmond")){
26 | if(sum(city==city.temp$a)>0){
27 | print(city.temp[which(city==city.temp$a),])
28 | # if we have the city in our data, then print it's temperature and the name of the city
29 | }
30 | if(sum(city==city.temp$a)==0){
31 | print(paste(city, "is NOT in the data. :(", sep=" "))
32 | # if not, then just print the name of the city next to "is Not in the data. :("
33 | }
34 | } # Loops can be as complicated and long as they could be. Often not so efficient.
35 |
36 | ## ---- eval=FALSE---------------------------------------------------------
37 | ## system.time(
38 | ## for(i in 1:1000){
39 | ## print(i)
40 | ## })
41 | ##
42 | ## system.time(
43 | ## for(i in 1:1000){
44 | ## print(i)
45 | ## if(i == 50) break
46 | ## })
47 |
48 | ## ------------------------------------------------------------------------
49 | x <- 7
50 | if(x > 10){
51 | print(x)
52 |
53 | }else{ # "else" should not start its own line.
54 | # Always let it be preceded by a closing brace on the same line.
55 | print("NOT BIG ENOUGH!!")
56 | }
57 |
58 | ## ------------------------------------------------------------------------
59 | # ifelse(test, yes, no)
60 | gender <- sample(c("male", "female"), 100, replace=TRUE)
61 | gender
62 | gender <- ifelse(gender=="male", 1, 0)
63 | gender
64 |
65 | ## ---- eval=FALSE---------------------------------------------------------
66 | ## # if there are multiple statements, then use ; to separate each statement
67 | ## x <- 0
68 | ## while(x < 5) {print(x <- x+1)}
69 | ## x <- 1
70 | ## while(x < 5) {x <- x+1; if (x == 3) break; print(x)} # break the loop when x=3
71 |
72 | ## ------------------------------------------------------------------------
73 | f <- function(x) x + 1
74 | class(f)
75 |
76 | ## ------------------------------------------------------------------------
77 | formals(f)
78 | body(f)
79 | environment(f)
80 |
81 | ## ------------------------------------------------------------------------
82 | f <- function(x) x + y
83 | y <- 1
84 | f(x = 1)
85 |
86 | ## ------------------------------------------------------------------------
87 | y <- 9001
88 | f <- function(x) {
89 | y <- 1
90 | g <- function (x) {
91 | x + y
92 | }
93 | g(x)
94 | }
95 | f(1)
96 |
97 | ## ------------------------------------------------------------------------
98 | h <- function(){
99 | if (!exists('a')) {
100 | a <- 1
101 | }
102 | else {
103 | a <- 9000
104 | }
105 | print(a)
106 | }
107 | h()
108 | h()
109 |
110 | ## ------------------------------------------------------------------------
111 | in_to_cm <- function(x) x * 2.5
112 | in_to_cm(69)
113 |
114 | ## ------------------------------------------------------------------------
115 | in_to_m <- function(x){
116 | in_to_cm(x) / 100
117 | }
118 | in_to_m(69)
119 |
120 | ## ------------------------------------------------------------------------
121 | in_to_cm <- function(x) x * 2.54
122 | in_to_m(69)
123 |
124 | ## ------------------------------------------------------------------------
125 | 69 == c(69)
126 |
127 | ## ------------------------------------------------------------------------
128 | heights <- c(69,54,73,82)
129 | in_to_m(heights)
130 |
131 | ## ---- eval=FALSE---------------------------------------------------------
132 | ## heights <- list(69,54,73,82)
133 | ## in_to_m(heights)
134 |
135 | ## ------------------------------------------------------------------------
136 | in_to_m(heights[[1]])
137 | in_to_m(heights[[2]])
138 | in_to_m(heights[[3]])
139 |
140 | ## ------------------------------------------------------------------------
141 | lapply(heights, in_to_m)
142 |
143 | ## ------------------------------------------------------------------------
144 | lapply(heights, FUN = function(x) x %/% 12)
145 |
146 | ## ------------------------------------------------------------------------
147 | dat <- read.csv('data/large.csv')
148 | str(dat)
149 | lapply(dat, mean)
150 |
151 | ## ---- eval=FALSE---------------------------------------------------------
152 | ## lapply(dat, mean(na.rm = TRUE))
153 |
154 | ## ---- eval=FALSE---------------------------------------------------------
155 | ## Map(mean, dat, na.rm=TRUE)
156 |
157 | ## ------------------------------------------------------------------------
158 | install.packages('parallelMap')
159 | library(parallelMap)
160 | system.time(Map(median, dat, na.rm=TRUE))
161 | system.time(parallelMap(median, dat, na.rm=TRUE))
162 |
163 | ## ---- eval=FALSE---------------------------------------------------------
164 | ## install.packages('devtools')
165 |
166 | ## ------------------------------------------------------------------------
167 | library(devtools)
168 | # has_devel() # this is currently returning a clang compiler error
169 |
170 | ## ---- eval=FALSE---------------------------------------------------------
171 | ## devtools::create("convertR")
172 |
173 | ## ---- eval=FALSE---------------------------------------------------------
174 | ## install.packages('roxygen2')
175 |
176 | ## ------------------------------------------------------------------------
177 | library(roxygen2)
178 |
179 | ## ---- eval=FALSE---------------------------------------------------------
180 | ## devtools::document('convertR')
181 |
182 | ## ---- eval=FALSE---------------------------------------------------------
183 | ## devtools::document('convertR')
184 |
185 | ## ---- eval=FALSE---------------------------------------------------------
186 | ## devtools::use_build_ignore("Rproj", pkg = "convertR")
187 |
188 | ## ---- eval=FALSE---------------------------------------------------------
189 | ## devtools::check("convertR")
190 |
191 | ## ---- eval=FALSE---------------------------------------------------------
192 | ## devtools::build('convertR')
193 |
194 | ## ------------------------------------------------------------------------
195 | RJ <- readLines("http://shakespeare.mit.edu/romeo_juliet/full.html")
196 |
197 | ## ------------------------------------------------------------------------
198 | RJ[grep("
", RJ, perl=TRUE)]
199 | RJ[grep("", RJ, perl=TRUE)]
200 | RJ[grep("", RJ, perl=TRUE)]
201 |
202 | ## ------------------------------------------------------------------------
203 | x <- list(NA)
204 | y <- grep("", RJ, perl=TRUE)
205 | for(i in 1:length(y)){
206 | if(i < length(y)){
207 | x[[i]] <- RJ[c(y[i]:(y[i+1]-1))]
208 | }
209 | if(i == length(y)){
210 | x[[i]] <- RJ[c(y[i]:length(RJ))]
211 | }
212 | }
213 |
214 | ## ------------------------------------------------------------------------
215 | countR <- function(z){
216 | return(c(length(grep("Romeo", z, perl=T)), length(grep("Juliet", z, perl=T))))
217 | }
218 | lapply(x, countR)
219 |
220 | ## ------------------------------------------------------------------------
221 | # now count the lines in each scene
222 | countL <- function(z){
223 | return(length(grep("
$", z, perl=T)))
224 | }
225 | lapply(x, countL)
226 |
227 |
--------------------------------------------------------------------------------
/instructor/day_four.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Day Four: Functional Programming"
3 | author: ["Dillon Niederhut", "Shinhye Choi"]
4 | date: "`r format(Sys.time(), '%d %B, %Y')`"
5 | output:
6 | - pdf_document
7 | - slidy_presentation
8 | ---
9 |
10 | ```{r, echo=FALSE}
11 | knitr::opts_knit$set(root.dir = '../')
12 | ```
13 |
14 | ## Introduction
15 |
16 | Remember how we said that R started off as a functional language? This means that, underneath the hood, R is built on many small functions that can be grouped together in smart ways to do powerful things. It also means that, if you want to do more complicated things in R than run summary statistics and linear models, you'll need to learn how to create and use functions.
17 |
18 | In R, functions are first-class citizens. This means that you can do anything to a function that you can do to an object, including using functions to create other functions.
19 |
20 | Structuring a computer language around functions and their methods makes it easily parellelizable in ways that object oriented languages are usually not (for many complicated reasons that we don't have time to talk about). Key components of function-oriented languages are functions that write functions and the ability to map functions to data structures.
21 |
22 | Finally, when we talk about the amazing extensibility of R, what we mean is that other people have written useful functions that you can find and download. If R is required in your field, it is likely because there are many functions specific to your field that have been developed in R. We'll close the intensive with a brief introduction to packaging and sharing R functions.
23 |
24 | # Looping
25 |
26 | Before we get to understand how functions work and learn how to create functions, let us go over how for-loop works in R. Many functions often have for-loops embedded in them, hence it will be useful to understand looping first. the basic syntax looks like the following:
27 |
28 | > side note - while you can use loops in R, the community strongly discourages explicit looping in favor of implicit loop functionals like `Map` and `lapply`
29 |
30 |
31 | syntax: for (variable in sequence) {statement}
32 |
33 | ```{r}
34 | ## I want to create a matrix of 2 to the power of n where n is 1 to 10.
35 |
36 | mat <- c(rep(NA, 10)) # first create a null vector
37 |
38 | # There are many ways to do the same task
39 | mat <- c(rep(NA, 6))
40 | for(i in 1:6){ # I want to create a matrix of 2 to the power of n where n is 5 to 10.
41 | mat[i] <- 2^(i+4)
42 | } # or
43 |
44 | mat <- c(rep(NA, 6))
45 | for(i in 5:10){
46 | mat[i-4] <- 2^i
47 | } # by setting sequence and statement accordingly
48 | ```
49 |
50 | You can also loop over a non-numeric vector
51 |
52 | ```{r}
53 | a <- c("Berkeley", "SF", "Oakland")
54 | b <- c(20, 18, 22)
55 | city.temp <- data.frame(cbind(a, b))
56 |
57 | for(city in c("Berkeley", "Walnut Creek", "Richmond")){
58 | if(sum(city==city.temp$a)>0){
59 | print(city.temp[which(city==city.temp$a),])
60 | # if we have the city in our data, then print it's temperature and the name of the city
61 | }
62 | if(sum(city==city.temp$a)==0){
63 | print(paste(city, "is NOT in the data. :(", sep=" "))
64 | # if not, then just print the name of the city next to "is Not in the data. :("
65 | }
66 | } # Loops can be as complicated and long as they could be. Often not so efficient.
67 | ```
68 |
69 | How can we make the running time shorter? The "break" command can be handy. We use the command when we want to "end the looping" once the variable reaches where we want it to stop.
70 |
71 | ```{r, eval=FALSE}
72 | system.time(
73 | for(i in 1:1000){
74 | print(i)
75 | })
76 |
77 | system.time(
78 | for(i in 1:1000){
79 | print(i)
80 | if(i == 50) break
81 | })
82 | ```
83 |
84 | Next we move on to control structures, such as if statements. ``If" statements are very useful when you want to assign different tasks to different subsets of data using a single for-loop. The basic syntax looks like the following: if(condition){statement} else{other statement}
85 |
86 | > side note - there is no `elseif` or `elif` keyword in R, which will be confusing to folks coming from Matlab and Python
87 |
88 | ```{r}
89 | x <- 7
90 | if(x > 10){
91 | print(x)
92 |
93 | }else{ # "else" should not start its own line.
94 | # Always let it be preceded by a closing brace on the same line.
95 | print("NOT BIG ENOUGH!!")
96 | }
97 | ```
98 |
99 | The ``ifelse" function can be handy as long as you have two conditions that are mutually exclusive
100 |
101 | ```{r}
102 | # ifelse(test, yes, no)
103 | gender <- sample(c("male", "female"), 100, replace=TRUE)
104 | gender
105 | gender <- ifelse(gender=="male", 1, 0)
106 | gender
107 | ```
108 |
109 |
110 | ``While" loops are used less frequently. The basic syntax: while(condition) {statement}
111 |
112 | ```{r, eval=FALSE}
113 | # if there are multiple statements, then use ; to separate each statement
114 | x <- 0
115 | while(x < 5) {print(x <- x+1)}
116 | x <- 1
117 | while(x < 5) {x <- x+1; if (x == 3) break; print(x)} # break the loop when x=3
118 | ```
119 |
120 | # Functions
121 |
122 | ## it's really easy to create functions in R
123 |
124 | ```{r}
125 | f <- function(x) x + 1
126 | class(f)
127 | ```
128 |
129 | ## every function has three parts
130 |
131 | every function needs inputs - these are called `arguments` (or, here, formal arguments)
132 |
133 | every function has stuff it does, and this stuff is contained in the body
134 |
135 | every function has an `environment` that it executes in
136 |
137 | ```{r}
138 | formals(f)
139 | body(f)
140 | environment(f)
141 | ```
142 |
143 | ## environments are where the function was defined
144 |
145 | see how our function has `R_GlobalEnv` as it's environment? that's because we defined it in the global environment
146 |
147 | this means that if you tell a function to look for an `object`, it will look in the global namespace
148 |
149 | ```{r}
150 | f <- function(x) x + y
151 | y <- 1
152 | f(x = 1)
153 | ```
154 |
155 | ## it's very common for functions to be declared within another function
156 |
157 | ```{r}
158 | y <- 9001
159 | f <- function(x) {
160 | y <- 1
161 | g <- function (x) {
162 | x + y
163 | }
164 | g(x)
165 | }
166 | f(1)
167 | ```
168 |
169 | this is important because it means that functions can be separated from the state of your computer (which is what makes them easy to parallelize)
170 |
171 | ## functions don't modify your computer state (usually)
172 |
173 | an obvious exception is writing/reading from disk, but what we really mean here is that anything created inside the function environment doesn't show up in the global environment
174 |
175 | ```{r}
176 | h <- function(){
177 | if (!exists('a')) {
178 | a <- 1
179 | }
180 | else {
181 | a <- 9000
182 | }
183 | print(a)
184 | }
185 | h()
186 | h()
187 | ```
188 |
189 | there are ways to make functions modify global variables, but this is generally not a good idea - anything that needs to go into a function should be in the arguments, and anything that needs to come out of the function should be returned
190 |
191 | > side note - R automatically returns the value of the last expression, so there is no need for an explicit `return` statement unless you want to break the function early
192 |
193 | a couple of days ago, we were dealing with data that came in several different units of length - let's try writing a function that converts inches to centimeters
194 |
195 | ```{r}
196 | in_to_cm <- function(x) x * 2.5
197 | in_to_cm(69)
198 | ```
199 |
200 | that's not juvenile humor - it's actually Dillon's height in inches
201 |
202 | what if we want to know how tall we are in meters?
203 |
204 | you could do `function(x) x * 2.5 / 100 ` but this would be repeating yourself
205 |
206 | then, when you figure out that the conversion factor is really *2.54*, not 2.5, you might update one and forget to update the other
207 |
208 | ```{r}
209 | in_to_m <- function(x){
210 | in_to_cm(x) / 100
211 | }
212 | in_to_m(69)
213 | ```
214 |
215 | now, if we go back and update `in_to_cm`, those changes automatically get propogated to `in_to_m`
216 |
217 | ```{r}
218 | in_to_cm <- function(x) x * 2.54
219 | in_to_m(69)
220 | ```
221 |
222 | if you were here for the intro to Unix, this idea of small functions combined together should sound awfully familiar
223 |
224 | R is a bit quirky in that there is no such thing as an uncontained value, e.g. `4` is really a vector with length of one, and a value of 4 in position 1
225 |
226 | ```{r}
227 | 69 == c(69)
228 | ```
229 |
230 | this means that R automatically broadcasts functions across vectors of any length
231 |
232 | ```{r}
233 | heights <- c(69,54,73,82)
234 | in_to_m(heights)
235 | ```
236 |
237 | ## this doesn't work with lists
238 |
239 | ```{r, eval=FALSE}
240 | heights <- list(69,54,73,82)
241 | in_to_m(heights)
242 | ```
243 |
244 | # Functionals
245 |
246 | a functional is a function that takes functions as arguments
247 |
248 | ## the wrong way to be functional
249 |
250 | imagine you want to apply a function to the columns of a dataframe (which is a list!)
251 |
252 | you could do something like this:
253 |
254 | ```{r}
255 | in_to_m(heights[[1]])
256 | in_to_m(heights[[2]])
257 | in_to_m(heights[[3]])
258 | ```
259 |
260 | but this is clunky, prone to errors, and can't acommodate changes in your list - if you added another item in the list, you would need to find every place you tried to do this and `cntrl-c` `cntrl-v` a whole bunch of crap
261 |
262 | ## the right way to be functional
263 |
264 | ```{r}
265 | lapply(heights, in_to_m)
266 | ```
267 |
268 | ## it's not always smart to name functions
269 |
270 | these are called anonymous functions - they aren't actually any different, but you should know they exist
271 |
272 | ```{r}
273 | lapply(heights, FUN = function(x) x %/% 12)
274 | ```
275 |
276 | ## lapply has limits
277 |
278 | ```{r}
279 | dat <- read.csv('data/large.csv')
280 | str(dat)
281 | lapply(dat, mean)
282 | ```
283 |
284 | we *know* there are numbers there - why are the means all missing?
285 |
286 | a. we didn't use amelia
287 | b. `mean` has an argument named `na.rm` that ignores missingness
288 |
289 | *and for Hadley knows what reason, the default is FALSE*
290 |
291 | but we can't do this
292 |
293 | ```{r, eval=FALSE}
294 | lapply(dat, mean(na.rm = TRUE))
295 | ```
296 |
297 | ## so we use Map
298 |
299 | Map is a function similar to those found in other functional languages
300 |
301 | ```{r, eval=FALSE}
302 | Map(mean, dat, na.rm=TRUE)
303 | ```
304 |
305 | ## this can be parallelized
306 |
307 | > side note - previous versions of these materials imported the `parallel` library, which is no longer supported as of R versions >= 3.2
308 |
309 | ```{r}
310 | install.packages('parallelMap')
311 | library(parallelMap)
312 | system.time(Map(median, dat, na.rm=TRUE))
313 | system.time(parallelMap(median, dat, na.rm=TRUE))
314 | ```
315 |
316 | parallel processing incurs time costs from memory management and message passing that can make small jobs take longer in parallel than in serial
317 |
318 | # Packages
319 |
320 | ** Real artists ship - Steve Jobs **
321 |
322 | ## why to package
323 |
324 | 1. embrace your inner sloth
325 | 2. Linus's law
326 | 3. R packages get cited
327 |
328 | ## how to package
329 |
330 | 1. document first
331 | 2. avoid feature creep
332 | 3. release early, release often
333 |
334 | we're going to be using devtools, an R package which makes it easier to build, install, and share packages
335 |
336 | ```{r, eval=FALSE}
337 | install.packages('devtools')
338 | ```
339 |
340 | ```{r}
341 | library(devtools)
342 | # has_devel() # this is currently returning a clang compiler error
343 | ```
344 |
345 | ## basic components of a package
346 |
347 | 1. the only thing a package needs to be a package is `Description` - this should tell you something about the importance of documentation (namely, that code without documentation is worthless)
348 |
349 | 2. your package will also need a name - keep it simple but unique (i.e. easily Googleable)
350 |
351 | 3. *the code*
352 |
353 | 4. namespace
354 |
355 | ## getting started
356 |
357 | we'll use devtools to create the boilerplate for us
358 |
359 | ```{r, eval=FALSE}
360 | devtools::create("convertR")
361 | ```
362 |
363 | ## editing the DESCRIPTION
364 |
365 | the `DESCRIPTION` is a plaintext file in DCF format (similar to YAML). if you open it up, you should see something like this:
366 |
367 | ```
368 | Package: convertR
369 | Title: What the Package Does (one line, title case)
370 | Version: 0.0.0.9000
371 | Authors@R: person("First", "Last", email = "first.last@example.com", role = c("aut", "cre"))
372 | Description: What the package does (one paragraph)
373 | Depends: R (>= 3.2.1)
374 | License: What license is it under?
375 | LazyData: true
376 | ```
377 |
378 | it's your job to edit this to contain the correct information
379 |
380 | ## adding dependencies
381 |
382 | devtools automatically adds in that your code depends on the ability to run R, and at a version number equal to or greater than the one you are currently using
383 |
384 | what if there are other packages that your package uses? like ggplot2? do
385 |
386 | ```
387 | Imports: ggplot
388 | ```
389 |
390 | and if you want list optional packages, you can do so like this:
391 |
392 | ```
393 | Suggests:
394 | reshape2 (>=1.4.1)
395 | plyr (>=1.8.3)
396 | ```
397 |
398 | > side note - moving the packages a line below is a stylistic choice so that they line up if there is more than one - be sure to indent!
399 |
400 | ## adding your code
401 |
402 | generally speaking, your package should only contain definitions of objects, most of which are functions
403 |
404 | these are placed in `.R` files in the `/R` directory
405 |
406 | we can take the functions that we defined above:
407 |
408 | ```
409 | in_to_cm <- function(x) x * 2.54
410 |
411 | in_to_m <- function(x){
412 | in_to_cm(x) / 100
413 | }
414 | ```
415 |
416 | put them in a file in `/R`, and save it with an informative name like `lengths.R`
417 |
418 | ## creating man pages
419 |
420 | these are descriptions of each object in your .R script
421 |
422 | you could write these in R's semi-LaTeX format yourself, but that's time consuming
423 |
424 | as Raymond Hettinger would say "there must be a better way"
425 |
426 | ```{r, eval=FALSE}
427 | install.packages('roxygen2')
428 | ```
429 |
430 | ```{r}
431 | library(roxygen2)
432 | ```
433 |
434 | now we're going to add specialized comments to our length.R file
435 |
436 | ```
437 | #' Converts inches to centimeters
438 | #'
439 | #' @param x A numeric
440 | #' @return Converted numeric
441 | #' @examples
442 | #' in_to_cm(1)
443 | #' in_to_cm(c(1,2,3))
444 | in_to_cm <- function(x) x * 2.54
445 |
446 | #' Converts inches to meters
447 | #'
448 | #' @param x A numeric
449 | #' @return Converted numeric
450 | #' @examples
451 | #' in_to_m(1)
452 | #' in_to_m(c(1,2,3))
453 | in_to_m <- function(x){
454 | in_to_cm(x) / 100
455 | }
456 | ```
457 |
458 | and now create the documentation with
459 |
460 | ```{r, eval=FALSE}
461 | devtools::document('convertR')
462 | ```
463 |
464 | ## `NAMESPACE`
465 |
466 | this is a file that tells R what `names` from the `environment` your package calls, and what `names` your package is going to put into the `global environment` for the user
467 |
468 | again, you can write this yourself in `NAMESPACE`:
469 |
470 | ```
471 | export(in_to_cm)
472 | export(in_to_m)
473 | ```
474 |
475 | or you can have roxygen2 handle it for you by adding `#' @export` in the function blocks you want to have exported
476 |
477 | ```
478 | #' Converts inches to centimeters
479 | #'
480 | #' @param x A numeric
481 | #' @return Converted numeric
482 | #' @examples
483 | #' in_to_cm(1)
484 | #' in_to_cm(c(1,2,3))
485 | #' @export
486 | in_to_cm <- function(x) x * 2.54
487 |
488 | #' Converts inches to meters
489 | #'
490 | #' @param x A numeric
491 | #' @return Converted numeric
492 | #' @examples
493 | #' in_to_m(1)
494 | #' in_to_m(c(1,2,3))
495 | #' @export
496 | in_to_m <- function(x){
497 | in_to_cm(x) / 100
498 | }
499 | ```
500 |
501 | then running:
502 |
503 | ```{r, eval=FALSE}
504 | devtools::document('convertR')
505 | ```
506 |
507 | roxygen is careful in that it will only write files if they:
508 |
509 | a. do not exist yet
510 | b. were created by roxygen
511 |
512 | you can see this in the header:
513 |
514 | ```
515 | # Generated by roxygen2 (4.1.1): do not edit by hand
516 | ```
517 |
518 | ## data
519 |
520 | lastly, if you are shipping data with your code, it goes in the `/data` directory
521 |
522 | CRAN expects this to contain a single `.Rdata` file created by `save()`
523 |
524 | the other option is to use `devtools::use_data()`
525 |
526 | ## source packages are not bundled packages
527 |
528 | now you have a package!
529 |
530 | but no one else can use it
531 |
532 | this might be want you want, but you may want to share it
533 |
534 | first, we'll have to tell R what things are not a part of our shipped package
535 |
536 | ## adding .Rbuildignore
537 |
538 | CRAN is very fussy about what they allow to be uploaded, and in what format
539 |
540 | remember .gitignore? there's a similar function for R packages
541 |
542 | you can add regex to .Rbuildignore via devtools
543 |
544 | ```{r, eval=FALSE}
545 | devtools::use_build_ignore("Rproj", pkg = "convertR")
546 | ```
547 |
548 | devtools automatically initiates this as a project file for RStudio, which no one else wants to see
549 |
550 | if you have a README, NEWS, or UPDATES file, you should add them to .Rbuildignore
551 |
552 | ## checking
553 |
554 | before you ship your code anywhere, you should check it to make sure it works the way it's supposed to
555 |
556 | ideally, you will have been doing this with unit testing all along, but that's beyond the scope of this class
557 |
558 | ```{r, eval=FALSE}
559 | devtools::check("convertR")
560 | ```
561 |
562 | this checks over 50 compatibility issues, and takes a bit of time, even with our two tiny functions
563 |
564 | ## shipping to Github
565 |
566 | if you're planning on shipping to github, you're pretty much set
567 |
568 | initialize git in your package, add the contents, and push to a repo
569 |
570 | ```
571 | git init
572 | git add *
573 | git commit -m "initial commit"
574 | git remote add origin git@github.com:deniederhut/convertR
575 | git push
576 | ```
577 |
578 | ## shipping to CRAN
579 |
580 | this is a bit more involved
581 |
582 | first, you'll need to build your package
583 |
584 | ```{r, eval=FALSE}
585 | devtools::build('convertR')
586 | ```
587 |
588 | this creates a compressed tarball, which should be called something like `convertR_0.0.0.9000.tar.gz"`
589 |
590 | which you'll submit via [https://cran.r-project.org/submit.html](https://cran.r-project.org/submit.html)
591 |
592 | in the comments, you should include:
593 |
594 | 1. the environments you checked on
595 | 2. the results of `devtools::check()`, with an explanation for any errors
596 |
597 |
598 | # Practice
599 |
600 | ## Assignment
601 |
602 | Remember how we read lines from an html document of Romeo and Juliet on day two?
603 |
604 | ```{r}
605 | RJ <- readLines("http://shakespeare.mit.edu/romeo_juliet/full.html")
606 | ```
607 |
608 | Write functions to parse this document into acts, then count the number of times the words "Romeo" and "Juliet" appear in each act. Then, package these functions.
609 |
610 | ## Example code
611 |
612 | Take a look at the data and look for some pattern.
613 |
614 | ```{r}
615 | RJ[grep("", RJ, perl=TRUE)]
616 | RJ[grep("", RJ, perl=TRUE)]
617 | RJ[grep("", RJ, perl=TRUE)]
618 | ```
619 |
620 | Now that we know that the first line of each act begins with the string ``", we will create a null list object called x, and then assign all the lines in each act to each component in x.
621 |
622 | ```{r}
623 | x <- list(NA)
624 | y <- grep("", RJ, perl=TRUE)
625 | for(i in 1:length(y)){
626 | if(i < length(y)){
627 | x[[i]] <- RJ[c(y[i]:(y[i+1]-1))]
628 | }
629 | if(i == length(y)){
630 | x[[i]] <- RJ[c(y[i]:length(RJ))]
631 | }
632 | }
633 | ```
634 |
635 | How should we count the number of the words appear in each act? Create a wrapper function that counts the number of the words and returns the number.
636 |
637 | ```{r}
638 | countR <- function(z){
639 | return(c(length(grep("Romeo", z, perl=T)), length(grep("Juliet", z, perl=T))))
640 | }
641 | lapply(x, countR)
642 | ```
643 |
644 | Now count the lines in each scene
645 |
646 | ```{r}
647 | # now count the lines in each scene
648 | countL <- function(z){
649 | return(length(grep("
$", z, perl=T)))
650 | }
651 | lapply(x, countL)
652 | ```
653 |
654 | # Acknowledgements
655 |
656 | ## Materials taken from:
657 |
658 | [Software Carpentry](https://swcarpentry.github.io/)
659 |
660 | [Hadley Wickham](http://adv-r.had.co.nz/)
661 |
662 | [more Hadley Wickham](http://r-pkgs.had.co.nz/)
663 |
--------------------------------------------------------------------------------
/instructor/day_four.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/instructor/day_four.pdf
--------------------------------------------------------------------------------
/instructor/day_one.R:
--------------------------------------------------------------------------------
1 | ## ---- echo=FALSE---------------------------------------------------------
2 | knitr::opts_knit$set(root.dir = '../')
3 |
4 | ## ------------------------------------------------------------------------
5 | ls
6 |
7 | ## ------------------------------------------------------------------------
8 | my.name <- dir
9 | my.name
10 |
11 | ## ------------------------------------------------------------------------
12 | my.name <- dir()
13 | my.name
14 |
15 | ## ------------------------------------------------------------------------
16 | class(dir)
17 |
18 | ## ------------------------------------------------------------------------
19 | sum(1,2,3)
20 |
21 | ## ------------------------------------------------------------------------
22 | is.object(sum)
23 |
24 | ## ------------------------------------------------------------------------
25 | getwd()
26 |
27 | ## ---- eval=FALSE---------------------------------------------------------
28 | ## setwd("/Users/dillonniederhut/Dropbox/dlab/R-for-Data-Science")
29 |
30 | ## ------------------------------------------------------------------------
31 | dir()
32 |
33 | ## ------------------------------------------------------------------------
34 | ls()
35 |
36 | ## ------------------------------------------------------------------------
37 | test <- "I have no idea what I'm doing"
38 | ls()
39 |
40 | ## ---- eval=FALSE---------------------------------------------------------
41 | ## rm(list = ls())
42 | ## exists(test)
43 |
44 | ## ------------------------------------------------------------------------
45 | ?exists
46 |
47 | ## ------------------------------------------------------------------------
48 | ??exists
49 |
50 | ## ------------------------------------------------------------------------
51 | example(exists)
52 |
53 | ## ------------------------------------------------------------------------
54 | apropos('lm')
55 |
56 | ## ---- eval=FALSE---------------------------------------------------------
57 | ## install.packages("Amelia")
58 |
59 | ## ------------------------------------------------------------------------
60 | library(Amelia)
61 |
62 | ## ---- eval=FALSE---------------------------------------------------------
63 | ## library(supercalifragilisticexpialedocious)
64 |
65 | ## ------------------------------------------------------------------------
66 | 2 + 2
67 | 2 - 2
68 | 2 * 2
69 | 2 %% 2
70 | 2 %/% 2
71 | 2 / 2
72 | 2 ** 2
73 | 2 ** .5
74 | 2 ** -1
75 |
76 |
77 | ## ------------------------------------------------------------------------
78 | abs(-2)
79 | pi
80 | round(pi,digits = 2)
81 | sign(-2)
82 | log(2)
83 | log10(2)
84 | cos(pi)
85 |
86 | ## ------------------------------------------------------------------------
87 | TRUE & TRUE
88 | TRUE | FALSE
89 | xor(TRUE,FALSE)
90 | ! FALSE
91 | 1 & 1
92 | 1 & 0
93 | !0
94 |
95 | ## ------------------------------------------------------------------------
96 | class(TRUE)
97 | class(1)
98 |
99 | ## ------------------------------------------------------------------------
100 | class(FALSE)
101 | class(pi)
102 | class("Look mama I'm letters")
103 | class(as.Date("2015-07-27"))
104 | class(factor(c('undergraduate','graduate','professor','staff')))
105 |
106 | ## ------------------------------------------------------------------------
107 | TRUE + TRUE
108 | 2 & 1
109 | TRUE * TRUE
110 | 2 & -1
111 |
112 | ## ------------------------------------------------------------------------
113 | if (9001) print('This is evaluated as a boolean value')
114 |
115 | ## ------------------------------------------------------------------------
116 | my.character <- paste("Hey", "momma", "I'm", "a", "string")
117 | my.character
118 |
119 | ## ------------------------------------------------------------------------
120 | substr(my.character,1,4)
121 |
122 | ## ------------------------------------------------------------------------
123 | substr(my.character,1,4) <- "Yes "
124 | my.character
125 |
126 | ## ------------------------------------------------------------------------
127 | strsplit(my.character, ' ')
128 |
129 | ## ------------------------------------------------------------------------
130 | gsub('.', 'X', my.character)
131 |
132 | ## ------------------------------------------------------------------------
133 | gsub('[.]', 'X', my.character)
134 | gsub('[g]', 'X', my.character)
135 |
136 | ## ------------------------------------------------------------------------
137 | my.date <- as.Date("2015-07-27")
138 | my.date + 7
139 | weekdays(my.date + 7)
140 | my.date - 365
141 | weekdays(my.date - 365)
142 |
143 | ## ------------------------------------------------------------------------
144 | my.factor <- factor(c('undergraduate','graduate','professor','staff'))
145 | levels(my.factor)
146 |
147 | ## ------------------------------------------------------------------------
148 | my.factor <- factor(c(1,2,3,4),
149 | levels=c(1,2,3,4),
150 | labels=c('undergraduate','graduate','professor','staff'))
151 | levels(my.factor)
152 |
153 | ## ------------------------------------------------------------------------
154 | is.character(my.character)
155 | is.numeric(my.character)
156 |
157 | ## ------------------------------------------------------------------------
158 | as.character(9)
159 | as.numeric(my.character)
160 |
161 | ## ------------------------------------------------------------------------
162 | my.vector <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
163 | my.vector
164 |
165 | ## ------------------------------------------------------------------------
166 | your.vector <- c(1,2,3,4,5)
167 | my.vector * your.vector
168 |
169 | ## ------------------------------------------------------------------------
170 | seq(from=0,to=length(my.vector),by=2)
171 |
172 | ## ------------------------------------------------------------------------
173 | 1:length(my.vector)
174 |
175 | ## ------------------------------------------------------------------------
176 | c(1,2,3) * c(TRUE, FALSE)
177 |
178 | ## ------------------------------------------------------------------------
179 | my.vector[1]
180 | your.vector[1:2]
181 | my.vector[c(1,3)]
182 |
183 | ## ------------------------------------------------------------------------
184 | my.list <- list(TRUE, 'two', 3)
185 | my.list
186 |
187 | ## ------------------------------------------------------------------------
188 | str(my.list)
189 |
190 | ## ---- eval=FALSE---------------------------------------------------------
191 | ## my.list * list(1, 'two', FALSE)
192 |
193 | ## ------------------------------------------------------------------------
194 | my.list[1]
195 |
196 | ## ------------------------------------------------------------------------
197 | my.list[[1]]
198 |
199 | ## ------------------------------------------------------------------------
200 | my.data <- data.frame(n = c(1,2,3),c=c('one','two','three'),b=c(TRUE,TRUE,FALSE))
201 | my.data
202 |
203 | ## ------------------------------------------------------------------------
204 | dim(my.data) #this gives you nrow() and ncol()
205 | colnames(my.data)
206 | rownames(my.data)
207 |
208 | ## ------------------------------------------------------------------------
209 | my.data[1:2,3]
210 |
211 | ## ------------------------------------------------------------------------
212 | str(my.data)
213 |
214 | ## ------------------------------------------------------------------------
215 | my.data$b
216 | my.data$d <- c(my.date, my.date+7, my.date-7)
217 | my.data
218 |
219 | ## ------------------------------------------------------------------------
220 | my.data$really.long.and.complicated.variable.name <- 999
221 | my.data$r
222 |
223 | ## ------------------------------------------------------------------------
224 | rbind(my.data, my.data)
225 | cbind(my.data, my.data)
226 |
227 |
--------------------------------------------------------------------------------
/instructor/day_one.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Day One: R Basics"
3 | author: "Dillon Niederhut"
4 | date: "`r format(Sys.time(), '%d %B, %Y')`"
5 | output:
6 | - pdf_document
7 | - slidy_presentation
8 | ---
9 |
10 | ```{r, echo=FALSE}
11 | knitr::opts_knit$set(root.dir = '../')
12 | ```
13 |
14 | ## Pre-introduction
15 |
16 | You should start by having your class go to our github page at [github.com/dlab-berkeley/R-for-Data-Science](github.com/dlab-berkeley/R-for-Data-Science) to get the course materials either via:
17 |
18 | 1. `git clone https://github.com/dlab-berkeley/R-for-Data-Science.git`; or
19 | 2. clicking the 'download zip` button on the right hand side of the screen
20 |
21 | The students won't need these materials today, but they will for the rest of the workshop. While everything is downloading, you can go on to:
22 |
23 | ## Introduction to the class
24 |
25 | Tthese materials are meant to be guides for you, the instructor. Your students will retain more of this content if they type these commands themselves than if they read them off of the slidedeck. That being said, at any time, you can create a slide deck by changing `output:` to be `html_slides` instead of `pdf_document`.
26 |
27 | **If you are a student and you are looking at this PDF**, be sure not to copy the code examples directly from the PDF. The typesetter uses character encoding that might not match the local settings on your computer, and the examples won't work. If you have fallen behind or can't see the screen very well, and want to copy the code examples, do so from the `.R` files in this directory.
28 |
29 | It is a good idea to start off the class by asking folks why they want to learn R. Common responses include:
30 |
31 | 1. Stata/SPSS/Matlab is too expensive
32 | 2. I saw a pretty graph someone made in R
33 | 3. My field uses analytical packages written for R
34 | 4. I have a deep and burning desire for open and reproducible research
35 |
36 | The outline below is designed to give each of these kinds of students the tools they need to get what they want out of R while avoiding common pitfalls. As the instructor, you should draw on your own experience to include further examples and advice, especially for students who do not fall into one of the four categories above.
37 |
38 | ## Introduction to R
39 |
40 | It may also be helpful to start off with a little bit of background knowledge about R. I find that explicitly informing students about the design principles of a language is a quick way to bootstrap their intuitions about how to use that language. R, for example, is a very old language whose objective was to allow scientists to quickly and interactively conduct statistical tests when the only other options at the time were:
41 |
42 | 1. Compile a whole program in C or FORTRAN; or,
43 | 2. Do the math yourself with a pencil and a piece of paper
44 |
45 | Obviously, neither of these is optimal, but what might not be obvious is that they both share the same problems; they require lots of human time, and those humans have to be very knowledgeable about the mathematical principles underlying statistical computation (e.g. that even simple functions have multiple implementations to balance accuracy/efficiency for different input values).
46 |
47 | The good news is that very complicated processes like logistic regression are a single command in R. The bad news is that R is typically not concerned with being logical or conistent. If you find yourself wanting to tear your hair out, this is **normal**.
48 |
49 | # Object Oriented Programming
50 |
51 | In the grand scheme of computer software, object orientation is a way of organizing code such that it is easy to update without breaking. This means grouping functions that serve a similar purpose into hierarchies. However, stating it this way is confusing and abstract.
52 |
53 | You can think about it this way: a soccer ball is an object. So is a basketball. They share a lot of things in common. It's simpler to know that balls generaly bounce than to explicitly declare for every ball I ever see in my entire life whether it bounces or not. I can't bounce you, for example, but you didn't need to tell me that when I met you. If I came to believe that people were bounce-able, I would update my idea of people generally, not every person specifically.
54 |
55 | We call things like you and basketball `objects`, and they are in `classes` like human and ball. If I want to create a new object, like a football, I don't have to declare every single thing there is to know about footballs. I can say it `inherits` `attributes` from the `class` ball, except that it's an oblate spheroid instead of a sphere. Easy.
56 |
57 | > side note - if you are coming from C++ or Java, be warned that objects in R do not have methods that are accessible with dot notation (in fact, the `.` is used just like `_`)
58 |
59 | ## everything in R is an object
60 |
61 | yes, even the commands, just watch
62 |
63 | ```{r}
64 | ls
65 | ```
66 |
67 | `ls`, like basketball, is a specific thing with a `name` and stuff inside it that makes it `ls` and not dillon niederhut. in this particular instance, we are looking at the function that tells you what `objects` are in your `environment`
68 |
69 | until we get to functional programming, your `environment` is just R plus whatever you put in R
70 |
71 | ## in R, you store objects with names with the `<-` operator
72 |
73 | just like you need names to tell things apart, R does too
74 |
75 | ```{r}
76 | my.name <- dir
77 | my.name
78 | ```
79 |
80 | ## names must be unique
81 |
82 | everytime you give an `object` a `name`, it removes anything that already had that `name` from your environment
83 |
84 | ```{r}
85 | my.name <- dir()
86 | my.name
87 | ```
88 |
89 | you see those parentheses? that means you are calling an object (here, it's a function evaluator) on `dir`.
90 |
91 | ## classes in R
92 |
93 | because it is code to be evalueated, `dir` belongs in a class called 'functions'
94 |
95 | ```{r}
96 | class(dir)
97 | ```
98 |
99 | functions all have the same basic structure
100 |
101 | `function(arguments)`, where the arguments are other objects, like
102 |
103 | ```{r}
104 | sum(1,2,3)
105 | ```
106 |
107 | `1,2,3` are also objects, with a class of their own
108 |
109 | when you call a function, it looks at the classes of the things you are calling it on to figure out how to behave
110 |
111 | in much the same way, if my function is to move things from point A to point B, the way I might do that to a basketball is different from the way I might do that to you
112 |
113 | what kind of class do you think `1` is?
114 |
115 | ## more bad news
116 |
117 | R started out as a functional programming language (more on this later), to which object orientation was later added
118 |
119 | this means that R doesn't know that some things are objects, because they predate the addition of class systems
120 |
121 | ```{r}
122 | is.object(sum)
123 | ```
124 |
125 | most of R uses what are called S3 methods, which have no rules except be easy to use. this can make them wildly inconsistent, even to the point where a single function will have multiple sets of rules for how it can be called (you'll see this in day 3).
126 |
127 | > as a side note, there is also no agreement about how to name things, so you'll likely see a mixture of snake_case and CamelCase, based on the preferences of the person who originally wrote the function
128 |
129 | # living in R
130 |
131 | ## figure out where you are with
132 |
133 | ```{r}
134 | getwd()
135 | ```
136 |
137 | like in Unix, in R you are always in a directory
138 |
139 | your actions are all relative to that directory
140 |
141 | ## tell R where you would like it to be with
142 |
143 | ```{r, eval=FALSE}
144 | setwd("/Users/dillonniederhut/Dropbox/dlab/R-for-Data-Science")
145 | ```
146 |
147 | ## find out what's in your directory with
148 |
149 | ```{r}
150 | dir()
151 | ```
152 |
153 | ## find out what's in your environment with
154 |
155 | in R, you are always in an environment (more on scoping in day 4)
156 |
157 | ```{r}
158 | ls()
159 | ```
160 |
161 | our environment is currently empty
162 |
163 | ```{r}
164 | test <- "I have no idea what I'm doing"
165 | ls()
166 | ```
167 |
168 | ## we can clean our environment with
169 |
170 | ```{r, eval=FALSE}
171 | rm(list = ls())
172 | exists(test)
173 | ```
174 |
175 | ## you can pull documentation with `?`
176 |
177 | ```{r}
178 | ?exists
179 | ```
180 |
181 | ## and search the help pages with `??`
182 |
183 | ```{r}
184 | ??exists
185 | ```
186 |
187 | ## you can get a quick example with
188 |
189 | ```{r}
190 | example(exists)
191 | ```
192 |
193 | when you kind of remember what you are looking for, try
194 |
195 | ```{r}
196 | apropos('lm')
197 | ```
198 |
199 | # The power of R is its extensibility
200 |
201 | many people write clever software that makes R smarter/better/faster/stronger
202 |
203 | ## you can install these packages with
204 |
205 | ```{r, eval=FALSE}
206 | install.packages("Amelia")
207 | ```
208 |
209 | > side note - by default, R tries to install packages to a write-protected directory on Windows machines. There are two ways around this: 1, say 'yes' to the option of installing the packages in a different location; or 2, use the `Tools / Install Packages` drop-down menu item in RStudio
210 |
211 | ## and include them in your environment with
212 |
213 | ```{r}
214 | library(Amelia)
215 | ```
216 |
217 | note that when you are installing something, you give R a bunch of letters to search CRAN for, which is why it's in quotes
218 |
219 | but when you pull it into your environment, you are calling a function on a `name`, which is why it isn't in quotes
220 |
221 | ## if you try to call `library` on package that you haven't downloaded, R will fuss at you
222 |
223 | ```{r, eval=FALSE}
224 | library(supercalifragilisticexpialedocious)
225 | ```
226 |
227 | # Math in R
228 |
229 | ## R can be a calculator
230 |
231 | ```{r}
232 | 2 + 2
233 | 2 - 2
234 | 2 * 2
235 | 2 %% 2
236 | 2 %/% 2
237 | 2 / 2
238 | 2 ** 2
239 | 2 ** .5
240 | 2 ** -1
241 |
242 | ```
243 |
244 | ## R does a few more complicated things
245 |
246 | ```{r}
247 | abs(-2)
248 | pi
249 | round(pi,digits = 2)
250 | sign(-2)
251 | log(2)
252 | log10(2)
253 | cos(pi)
254 | ```
255 |
256 | ## R also handles logic tables and testing
257 |
258 | ```{r}
259 | TRUE & TRUE
260 | TRUE | FALSE
261 | xor(TRUE,FALSE)
262 | ! FALSE
263 | 1 & 1
264 | 1 & 0
265 | !0
266 | ```
267 |
268 | # Data Types
269 |
270 | ## R differentiates between different types of data
271 |
272 | for example, the boolean and numeric values above
273 |
274 | ```{r}
275 | class(TRUE)
276 | class(1)
277 | ```
278 |
279 | you could also use `mode` to get the type of an object
280 |
281 | this will mean later, when you try to call `mode` to get the most frequently occurring level of a variable, you will be frustrated and sad
282 |
283 | don't dislike the messenger
284 |
285 | you will likely only ever deal with five flavors of data in R, which are stored as
286 |
287 | ## three data types
288 |
289 | ```{r}
290 | class(FALSE)
291 | class(pi)
292 | class("Look mama I'm letters")
293 | class(as.Date("2015-07-27"))
294 | class(factor(c('undergraduate','graduate','professor','staff')))
295 | ```
296 |
297 | > side note - by default, R stores everything as doubles (64 bit floating point numbers) which makes R very memory hungry. You can force it use an integer type with the `L` operator, like: `class(1L) ==` `r class(1L)`
298 |
299 | we've already dealt a lot with numerics above, so let's talk about
300 |
301 | # Boolean data
302 |
303 | ## logical values pretty much act like numerics
304 |
305 | ```{r}
306 | TRUE + TRUE
307 | 2 & 1
308 | TRUE * TRUE
309 | 2 & -1
310 | ```
311 |
312 | this can make it easy to use if/then statements, as `if x` evaluates to `TRUE` if it is anything other than zero
313 |
314 | ```{r}
315 | if (9001) print('This is evaluated as a boolean value')
316 | ```
317 |
318 | also, any vector (we'll talk about these below) multiplied by a boolean vector has all of its false values set to zero, which can be helpful for summing and average only specific cases
319 |
320 | # Character data
321 |
322 | ## character handling in R is fairly close to character handling in a Unix terminal
323 |
324 | ```{r}
325 | my.character <- paste("Hey", "momma", "I'm", "a", "string")
326 | my.character
327 | ```
328 |
329 | ## whitespace is the default separater in the paste function, if you don't want this, use `paste0()`
330 |
331 | ```{r}
332 | substr(my.character,1,4)
333 | ```
334 |
335 | ## note here that R is not a zero-indexed language
336 |
337 | ```{r}
338 | substr(my.character,1,4) <- "Yes "
339 | my.character
340 | ```
341 |
342 | ## you can separate characters with
343 |
344 | ```{r}
345 | strsplit(my.character, ' ')
346 | ```
347 |
348 | ## you can substitute with
349 |
350 | ```{r}
351 | gsub('.', 'X', my.character)
352 | ```
353 |
354 | R here calls Perl's regex library, where `.` is a special shorthand for "anything"
355 |
356 | ## to be safe, put it in brackets
357 |
358 | ```{r}
359 | gsub('[.]', 'X', my.character)
360 | gsub('[g]', 'X', my.character)
361 | ```
362 |
363 | # Datetime data
364 |
365 | ## R stores dates internally as the number of days since the epoch (1 Jan 1970)
366 |
367 | ```{r}
368 | my.date <- as.Date("2015-07-27")
369 | my.date + 7
370 | weekdays(my.date + 7)
371 | my.date - 365
372 | weekdays(my.date - 365)
373 | ```
374 |
375 | ## the epoch is common to (most) Unix systems
376 |
377 | makes it easy to add and subtract days
378 |
379 | however, most other languages use seconds since the epoch, not days
380 |
381 | these can both cause interoperability issues
382 |
383 | # Factor data
384 |
385 | ## R stores factors internally as integers, and uses the character strings as labels
386 |
387 | ```{r}
388 | my.factor <- factor(c('undergraduate','graduate','professor','staff'))
389 | levels(my.factor)
390 | ```
391 |
392 | notice how it sorts those levels alphabetically?
393 |
394 | this can cause issues when making plots or trying to display in a particular order - if sort order is critical
395 |
396 | ## try giving your factor explicitly numeric levels and character labels
397 |
398 | ```{r}
399 | my.factor <- factor(c(1,2,3,4),
400 | levels=c(1,2,3,4),
401 | labels=c('undergraduate','graduate','professor','staff'))
402 | levels(my.factor)
403 | ```
404 |
405 | # Testing and changing data types
406 |
407 | ## you can test types with `is.type`, e.g.
408 |
409 | ```{r}
410 | is.character(my.character)
411 | is.numeric(my.character)
412 | ```
413 |
414 | ## you can change datatypes with `as.type`, e.g.
415 |
416 | ```{r}
417 | as.character(9)
418 | as.numeric(my.character)
419 | ```
420 |
421 | trying to coerce types can lead to weird behavior
422 |
423 | # Data Structures
424 |
425 | there are five kinds of data structures in R, but you will probably only ever use three of these
426 |
427 | 1. vector
428 | 2. list
429 | 3. dataframe
430 |
431 | ## a vector is an ordered group of the same kind of data, e.g.
432 |
433 | ```{r}
434 | my.vector <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
435 | my.vector
436 | ```
437 |
438 | ## it doesn't matter what the datatype is, as long as it is all the same
439 |
440 | ```{r}
441 | your.vector <- c(1,2,3,4,5)
442 | my.vector * your.vector
443 | ```
444 |
445 | ## you will frequently need to create vectors that are sequences of numbers
446 |
447 | ```{r}
448 | seq(from=0,to=length(my.vector),by=2)
449 | ```
450 |
451 | ## R also gives you a shorthand operator for creating sequences where `by=1`
452 |
453 | ```{r}
454 | 1:length(my.vector)
455 | ```
456 |
457 | remember what we said about multiplying logical vectors?
458 |
459 | ## you can add and multiply vectors, but they need to be the same length
460 |
461 | ```{r}
462 | c(1,2,3) * c(TRUE, FALSE)
463 | ```
464 |
465 | you will run into this issue a bunch dealing with dataframes and logical vectors
466 |
467 | ## you can pull elements out of a vector by
468 |
469 | ```{r}
470 | my.vector[1]
471 | your.vector[1:2]
472 | my.vector[c(1,3)]
473 | ```
474 |
475 | ## a list is an ordered group of things that are not of the same type
476 |
477 | ```{r}
478 | my.list <- list(TRUE, 'two', 3)
479 | my.list
480 | ```
481 |
482 | ## you can find out the attributes for and types of data in a list with
483 |
484 | ```{r}
485 | str(my.list)
486 | ```
487 |
488 | ## lists are simple containers, and are not additive or multiplicative
489 |
490 | ```{r, eval=FALSE}
491 | my.list * list(1, 'two', FALSE)
492 | ```
493 |
494 | ## subsetting a list with brackets pulls out the element along with its attribute
495 |
496 | this will be annoying when you try to pull values out of objects like summary(lm())
497 |
498 | ```{r}
499 | my.list[1]
500 | ```
501 |
502 | ## if you want only the element, use double brackets
503 |
504 | ```{r}
505 | my.list[[1]]
506 | ```
507 |
508 | # Data frames
509 |
510 | ## inside R, a dataframe is just a list of equal-length vectors
511 |
512 | much like in SQL where a table is a tuple of attributes
513 |
514 | ```{r}
515 | my.data <- data.frame(n = c(1,2,3),c=c('one','two','three'),b=c(TRUE,TRUE,FALSE))
516 | my.data
517 | ```
518 |
519 | see how this is just a list of vectors?
520 |
521 | ## you can learn some things about data frames
522 |
523 | ```{r}
524 | dim(my.data) #this gives you nrow() and ncol()
525 | colnames(my.data)
526 | rownames(my.data)
527 | ```
528 |
529 | ## dataframes have some special operators they share with matrices - subset with brackets
530 |
531 | ```{r}
532 | my.data[1:2,3]
533 | ```
534 |
535 | ## dataframes also have special operators that they inherit from lists
536 |
537 | ```{r}
538 | str(my.data)
539 | ```
540 |
541 | ```{r}
542 | my.data$b
543 | my.data$d <- c(my.date, my.date+7, my.date-7)
544 | my.data
545 | ```
546 |
547 | ## the dollar operator also does partial matching
548 |
549 | ```{r}
550 | my.data$really.long.and.complicated.variable.name <- 999
551 | my.data$r
552 | ```
553 |
554 | since the number of rows in the dataframe (3) is a multiple of the length of the assignment (1), the vectors gets concatenated against itself three times
555 |
556 | ## you can combine data frames with
557 |
558 | ```{r}
559 | rbind(my.data, my.data)
560 | cbind(my.data, my.data)
561 | ```
562 |
563 | you'll learn tomorrow about better ways to merge data, especially heterogeneous data
564 |
565 | # saving console output
566 |
567 | ## introduction
568 |
569 | at the end of the day, it's likely that one or two students will want to know how to "save what we did". the commands are of course already in the .R file that the students have been typing their notes into. If they want to save the console output, they basically have three options:
570 |
571 | 1. copy all the output and paste it into a separate text file; or,
572 | 2. use a sink; or,
573 | 3. write their notes as .Rmd
574 |
575 | ## sinks
576 |
577 | to use a sink, have the student put `sink('filename')` as the very first line in their notes, and `sink()` as the very last. then, when they re-run their entire .R file, the output will go to a pdf called "filename" instead of the R console. for an example, see `save_console_output.*` in the examples directory.
578 |
579 | ## .Rmd
580 |
581 | See [Dynamic documents in R Markdown](https://github.com/deniederhut/workshop_Rmd)
582 |
583 | # Acknowledgements
584 |
585 | ## Materials taken from:
586 |
587 | [Hadley Wickham](http://adv-r.had.co.nz/)
588 |
--------------------------------------------------------------------------------
/instructor/day_one.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/instructor/day_one.pdf
--------------------------------------------------------------------------------
/instructor/day_three.R:
--------------------------------------------------------------------------------
1 | ## ---- echo=FALSE---------------------------------------------------------
2 | knitr::opts_knit$set(root.dir = '../')
3 |
4 | ## ------------------------------------------------------------------------
5 | load('data/feedback.Rda')
6 | str(dat)
7 |
8 | ## ------------------------------------------------------------------------
9 | summary(dat)
10 | table(dat$department)
11 |
12 | ## ------------------------------------------------------------------------
13 | library(psych)
14 | describe(dat)
15 |
16 | ## ------------------------------------------------------------------------
17 | library(dplyr)
18 | dat %>% group_by(gender) %>% summarize(n())
19 |
20 | ## ------------------------------------------------------------------------
21 | library(tidyr)
22 | dat %>% filter(!is.na(gender)) %>% group_by(gender, department) %>%
23 | summarize(n=n()) %>% spread(gender, n)
24 |
25 | ## ------------------------------------------------------------------------
26 | install.packages('ggplot2')
27 | library(ggplot2)
28 |
29 | ## ------------------------------------------------------------------------
30 | dat$wday <- factor(weekdays(dat$timestamp, abbreviate = TRUE),
31 | levels = c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')
32 | )
33 | summary(dat$wday)
34 |
35 | ## ------------------------------------------------------------------------
36 | qplot(instructor.communicated, data = dat)
37 | qplot(wday, course.delivered, data = dat)
38 |
39 | ## ------------------------------------------------------------------------
40 | ggplot(data=dat, aes(x=wday)) + geom_bar()
41 |
42 | ## ------------------------------------------------------------------------
43 | ggplot(data=dat, aes(x=course.delivered)) +
44 | geom_histogram(binwidth=1)
45 |
46 | ## ------------------------------------------------------------------------
47 | ggplot(data=dat, aes(x=course.delivered)) +
48 | geom_histogram(binwidth=1, fill = 'gold', colour= 'blue')
49 |
50 | ## ------------------------------------------------------------------------
51 | ggplot(data=dat, aes(x=gender,y=interest)) + geom_boxplot()
52 |
53 | ## ------------------------------------------------------------------------
54 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) + geom_point()
55 |
56 | ## ------------------------------------------------------------------------
57 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
58 | geom_jitter()
59 |
60 | ## ------------------------------------------------------------------------
61 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
62 | geom_jitter(aes(colour = wday))
63 |
64 | ## ------------------------------------------------------------------------
65 | ggplot(data=dat, aes(x=wday, y=course.delivered)) +
66 | geom_boxplot(colour = 'gold') +
67 | geom_jitter(colour = 'blue')
68 |
69 | ## ------------------------------------------------------------------------
70 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
71 | geom_jitter() +
72 | stat_smooth(method = 'lm')
73 |
74 | ## ------------------------------------------------------------------------
75 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered, colour = wday)) +
76 | geom_jitter() +
77 | stat_smooth(method = 'lm', se = FALSE)
78 |
79 | ## ------------------------------------------------------------------------
80 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
81 | geom_jitter() +
82 | stat_smooth(method = 'lm', colour = 'black') +
83 | xlab('How well the instructor communicated (1-7)') +
84 | ylab('How well the course delivered advertised content (1-7)') +
85 | ggtitle("I have no idea what I'm doing")
86 |
87 | ## ------------------------------------------------------------------------
88 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
89 | geom_jitter() +
90 | stat_smooth(method = 'lm') +
91 | facet_grid(. ~ useful)
92 |
93 | ## ------------------------------------------------------------------------
94 | t.test(dat$inside.barriers, dat$outside.barriers)
95 |
96 | ## ------------------------------------------------------------------------
97 | t.test(dat$outside.barriers[dat$gender == "Male/Man"], dat$outside.barriers[dat$gender == "Female/Woman"])
98 |
99 | ## ------------------------------------------------------------------------
100 | t.test(outside.barriers ~ gender, data = dat, subset = dat$gender %in% c("Male/Man", "Female/Woman"))
101 |
102 | ## ------------------------------------------------------------------------
103 | aov(outside.barriers ~ gender, data = dat)
104 |
105 | ## ------------------------------------------------------------------------
106 | model.1 <- aov(outside.barriers ~ gender, data = dat)
107 | summary(model.1)
108 |
109 | ## ------------------------------------------------------------------------
110 | TukeyHSD(model.1)
111 |
112 | ## ------------------------------------------------------------------------
113 | cor.test(dat$outside.barriers, dat$inside.barriers)
114 |
115 | ## ------------------------------------------------------------------------
116 | model.1 <- lm(inside.barriers ~ outside.barriers, data = dat)
117 | summary(model.1)
118 |
119 | ## ------------------------------------------------------------------------
120 | model.2 <- lm(inside.barriers ~ outside.barriers + department, data = dat)
121 | summary(model.2)
122 |
123 | ## ------------------------------------------------------------------------
124 | model.3 <- lm(inside.barriers ~ outside.barriers + department + outside.barriers*department, data = dat)
125 | summary(model.3)
126 |
127 | ## ------------------------------------------------------------------------
128 | model.1$coefficients
129 | model.1$coefficients[[2]]
130 |
131 | ## ---- eval=FALSE---------------------------------------------------------
132 | ## dat$residuals <- model.1$residuals
133 |
134 | ## ------------------------------------------------------------------------
135 | dat.listwise <- dat[!is.na(dat$inside.barriers) & !is.na(dat$outside.barriers), ]
136 | dat.listwise$resid <- model.1$residuals
137 |
138 | ## ------------------------------------------------------------------------
139 | ggplot(data = dat.listwise, aes(x=gender,y=resid)) +
140 | geom_boxplot()
141 |
142 | ## ------------------------------------------------------------------------
143 | wilcox.test(dat$outside.barriers, dat$inside.barriers, alternative = "two.sided", paired = FALSE, mu = 0, conf.level = 0.95)
144 |
145 | ## ------------------------------------------------------------------------
146 | cor.test(dat$outside.barriers, dat$inside.barriers, method = 'spearman')
147 |
148 | ## ------------------------------------------------------------------------
149 | chisq.test(dat$gender, dat$department)
150 |
151 | ## ------------------------------------------------------------------------
152 | names(data)
153 |
154 |
--------------------------------------------------------------------------------
/instructor/day_three.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Day Three: Data Analysis"
3 | author: "Dillon Niederhut"
4 | date: "`r format(Sys.time(), '%d %B, %Y')`"
5 | output:
6 | - pdf_document
7 | - slidy_presentation
8 | ---
9 |
10 | ```{r, echo=FALSE}
11 | knitr::opts_knit$set(root.dir = '../')
12 | ```
13 |
14 | ## Pre-introduction
15 |
16 | While everyone is getting situated and/or cloning the course materials, pull up `feedback_cleaner.R`. As a review of day 3, walk through the different parts of the script, and ask the students to describe to you what each piece does. For example,
17 |
18 | ```
19 | dat$timestamp <- sub(' [0-9]+:[0-9]+:[0-9]+', '', dat$timestamp)
20 | dat$timestamp <- as.Date(dat$timestamp, "%m/%d/%Y")
21 | ```
22 |
23 | is code that reformats ISO timestamps so that R can read them as date-type values.
24 |
25 | ## Introduction
26 |
27 | analysis generally procedes in two steps:
28 |
29 | 1. exploratory data analysis
30 | 2. statistical inference
31 |
32 | our treatment of graphing owes a lot to the Grammar of Graphics
33 |
34 | # Summarizing
35 |
36 | ## let's load in some data about D-Lab feedback
37 |
38 | ```{r}
39 | load('data/feedback.Rda')
40 | str(dat)
41 | ```
42 |
43 | ## R provides two easy/simple summary functions in the base package
44 |
45 | ```{r}
46 | summary(dat)
47 | table(dat$department)
48 | ```
49 |
50 | ## the `psych` package provides trimmed means, skew, kurtosis, and missingness
51 |
52 | ```{r}
53 | library(psych)
54 | describe(dat)
55 | ```
56 |
57 | ## you can use dplyr::groupby to generate summaries
58 |
59 | ```{r}
60 | library(dplyr)
61 | dat %>% group_by(gender) %>% summarize(n())
62 | ```
63 |
64 | ## and you can combine dplyr with tidyr::spread to generate crosstabs
65 |
66 | > side note - we are filtering out missing values of gender, because `tidyr` doesn't allow `NA` as a column name
67 |
68 | ```{r}
69 | library(tidyr)
70 | dat %>% filter(!is.na(gender)) %>% group_by(gender, department) %>%
71 | summarize(n=n()) %>% spread(gender, n)
72 | ```
73 |
74 | # Plotting
75 |
76 | ## every time you use `base::plot`, [Edward Tufte does something unkind to a cute animal](http://markandrewgoetz.com/blog/2009/11/my-new-wallpaper/)
77 |
78 | - we'll be using ggplot, R's implementation of the **grammar of graphics**
79 |
80 | - in this grammar, you use 'aesthetics' to define how data is mapped to objects the graph space
81 |
82 | - each graph space has at least three layers:
83 | - theme/background/annotations
84 | - axes
85 | - objects
86 |
87 | - most objects are geometric shapes
88 |
89 | - some objects are statistics built on those shapes
90 |
91 | - you can stack as many layers as you like
92 |
93 | ```{r}
94 | install.packages('ggplot2')
95 | library(ggplot2)
96 | ```
97 |
98 | ## getting weekdays
99 |
100 | let's imagine that we are interested in looking at differences in feedback based on the day of the week -- how would we do this in R?
101 |
102 | > side note - `weekdays` is locale aware, so students who have their laptop language set to something other than english will get their weekday names in the other language
103 |
104 | ```{r}
105 | dat$wday <- factor(weekdays(dat$timestamp, abbreviate = TRUE),
106 | levels = c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')
107 | )
108 | summary(dat$wday)
109 | ```
110 |
111 | ## use qplot for initial poking around
112 |
113 | it has very strong intuitions about what you want to see, and is not particularly customizable
114 |
115 | ```{r}
116 | qplot(instructor.communicated, data = dat)
117 | qplot(wday, course.delivered, data = dat)
118 | ```
119 |
120 | ## for 1D categorical, use bar
121 |
122 | ```{r}
123 | ggplot(data=dat, aes(x=wday)) + geom_bar()
124 | ```
125 |
126 | ## for 1D continuous, use hist
127 |
128 | this is really just convenience for `geom_bar(stat = 'bin')`, as opposed to bar plots, whose `stat` is `'count'`
129 |
130 | ```{r}
131 | ggplot(data=dat, aes(x=course.delivered)) +
132 | geom_histogram(binwidth=1)
133 | ```
134 |
135 | you can add color to this plot
136 |
137 | ```{r}
138 | ggplot(data=dat, aes(x=course.delivered)) +
139 | geom_histogram(binwidth=1, fill = 'gold', colour= 'blue')
140 | ```
141 |
142 | GO BEARS
143 |
144 | ## for many 1D variables, use a box plot
145 |
146 | these are handy for a whole bunch of reasons, and you should make them your close associates
147 |
148 | ```{r}
149 | ggplot(data=dat, aes(x=gender,y=interest)) + geom_boxplot()
150 | ```
151 |
152 | ## to plot two continuous variables, use points
153 |
154 | ```{r}
155 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) + geom_point()
156 | ```
157 |
158 | all of these values are discrete, which makes them hard to see
159 |
160 | ## to scatter points randomy, use jitter
161 |
162 | this is really just convenience for `geom_point(position = jitter())`
163 |
164 | ```{r}
165 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
166 | geom_jitter()
167 | ```
168 |
169 | not only can you add color, you can make the color a mapping of other variables
170 |
171 | ```{r}
172 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
173 | geom_jitter(aes(colour = wday))
174 | ```
175 |
176 | the last time we used `colour` it was not an aesthetic - why is it now?
177 |
178 | ## you can stack layers until your eyes hurt
179 |
180 | ```{r}
181 | ggplot(data=dat, aes(x=wday, y=course.delivered)) +
182 | geom_boxplot(colour = 'gold') +
183 | geom_jitter(colour = 'blue')
184 | ```
185 |
186 | ## add summary functions with smooth
187 |
188 | ```{r}
189 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
190 | geom_jitter() +
191 | stat_smooth(method = 'lm')
192 | ```
193 |
194 | if you are using colour as an aesthetic, you'll produce stats for each color
195 |
196 | ```{r}
197 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered, colour = wday)) +
198 | geom_jitter() +
199 | stat_smooth(method = 'lm', se = FALSE)
200 | ```
201 |
202 | ## good scientists put units on their axes
203 |
204 | ```{r}
205 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
206 | geom_jitter() +
207 | stat_smooth(method = 'lm', colour = 'black') +
208 | xlab('How well the instructor communicated (1-7)') +
209 | ylab('How well the course delivered advertised content (1-7)') +
210 | ggtitle("I have no idea what I'm doing")
211 | ```
212 |
213 | the general point here is that every single object on this graph is customizable
214 |
215 | frequent customizations are very simple to add
216 |
217 | infrequent customizations will take a lot of tinkering on your part
218 |
219 | ## facetting
220 |
221 | often useful for looking at relationships between three variables at the same time
222 |
223 | ```{r}
224 | ggplot(data=dat, aes(x=instructor.communicated, y=course.delivered)) +
225 | geom_jitter() +
226 | stat_smooth(method = 'lm') +
227 | facet_grid(. ~ useful)
228 | ```
229 |
230 | # Mean testing
231 |
232 | a picture is worth 1,000 words, but a p-value is worth a dissertation
233 |
234 | basically, inferential statistics is the application of probability theory to decide what is real and what isn't
235 |
236 | we'll start by trying to tell whether differences between group summaries are real
237 |
238 | ## t.test with two vectors (default method)
239 |
240 | ```{r}
241 | t.test(dat$inside.barriers, dat$outside.barriers)
242 | ```
243 |
244 | note that R takes care of the defaults for you - what it is really computing is `t.test(dat$inside.barriers, dat$outside.barriers, alternative = "two.sided", paired = FALSE, var.equal = FALSE, mu = 0, conf.level = 0.95)
245 |
246 | how would you find this out for yourself?
247 |
248 | ## t.test with subsets of one vector (default method)
249 |
250 | ```{r}
251 | t.test(dat$outside.barriers[dat$gender == "Male/Man"], dat$outside.barriers[dat$gender == "Female/Woman"])
252 | ```
253 |
254 | recall that we mentioned inconsistency on day one - here it is, and in a big way
255 |
256 | ## t.test with S3 method
257 |
258 | ```{r}
259 | t.test(outside.barriers ~ gender, data = dat, subset = dat$gender %in% c("Male/Man", "Female/Woman"))
260 | ```
261 |
262 | ## aov
263 |
264 | first, you would think anova would be called by `anova`, but that's reserved for conducting F-tests on lm objects
265 |
266 | second, you really shouldn't be using anova, but if you must do it in R, the syntax looks like this
267 |
268 | > side note - ANOVA was invented by Ron Fisher to make it easy to do linear models with only a pencil and paper, and has been superceded by regression since the advent of computation in the 70s
269 |
270 | ```{r}
271 | aov(outside.barriers ~ gender, data = dat)
272 | ```
273 |
274 | this isn't particularly helpful, but remember that it is an object, and we can call other, more helpful functions, on that object
275 |
276 | remember our old friend `summary`? it works on almost everything
277 |
278 | ```{r}
279 | model.1 <- aov(outside.barriers ~ gender, data = dat)
280 | summary(model.1)
281 | ```
282 |
283 | that's a little better - but what about post-hoc testing?
284 |
285 | ```{r}
286 | TukeyHSD(model.1)
287 | ```
288 |
289 | > side note - apparently Stata stores all of the models that you generate, whether you assign them names or not; in R, you must explicitly give your models names or they will disappear into the ether
290 |
291 | # linear models
292 |
293 | mean tests are really just a subset of linear models where your predictor is a category
294 |
295 | ## cor.test (Pearson)
296 |
297 | earlier, we were looking at differences between the means of two variables
298 |
299 | but those variables were both continuous, so we can ask whether they are related
300 |
301 | ```{r}
302 | cor.test(dat$outside.barriers, dat$inside.barriers)
303 | ```
304 |
305 | okay, so they're related - now what?
306 |
307 | ## lm
308 |
309 | this is probably the closest you will get to building a linear model by hand
310 |
311 | this means lm is a powerful tool, but you have to know what you're doing
312 |
313 | the basic call is the S3 method
314 |
315 | ```{r}
316 | model.1 <- lm(inside.barriers ~ outside.barriers, data = dat)
317 | summary(model.1)
318 | ```
319 |
320 | ## R automatically one-hot encodes your categories
321 |
322 | ```{r}
323 | model.2 <- lm(inside.barriers ~ outside.barriers + department, data = dat)
324 | summary(model.2)
325 | ```
326 |
327 | ## R does not assume you want the full factorial model
328 |
329 | ```{r}
330 | model.3 <- lm(inside.barriers ~ outside.barriers + department + outside.barriers*department, data = dat)
331 | summary(model.3)
332 | ```
333 |
334 | ## extract model parameters with `$`
335 |
336 | ```{r}
337 | model.1$coefficients
338 | model.1$coefficients[[2]]
339 | ```
340 |
341 | ## this is useful if you want to plot residuals
342 |
343 | ```{r, eval=FALSE}
344 | dat$residuals <- model.1$residuals
345 | ```
346 |
347 | oh boy golly gee gosh darn! remember how we talked about R having casewise deletion + bad indexing? this is one place where it makes your life difficult
348 |
349 | we have to do something like this:
350 |
351 | ```{r}
352 | dat.listwise <- dat[!is.na(dat$inside.barriers) & !is.na(dat$outside.barriers), ]
353 | dat.listwise$resid <- model.1$residuals
354 | ```
355 |
356 | then we can do this
357 |
358 | ```{r}
359 | ggplot(data = dat.listwise, aes(x=gender,y=resid)) +
360 | geom_boxplot()
361 | ```
362 |
363 | # Nonparametric
364 |
365 | parametric refers to using means, deviations, and other estimates of population parameters
366 |
367 | *BUT* what if you don't want to make assumptions about the structure of the population?
368 |
369 | or what if you **gasp** can't?
370 |
371 | ## ranked variables
372 |
373 | a simple case is where means don't have meaning
374 |
375 | above we were looking at correlations between Likert variables
376 |
377 | all Likerts are really rank variables, which means they don't act like actual number-y numbers
378 |
379 | in the real world, a 6 foot tall person is twice as tall as a 3 foot tall person
380 |
381 | but is a level '6' really twice as many barriers to access as a '3'?
382 |
383 | **NOPE**
384 |
385 | we know that 6 is more than 3, but can't really say how much - in that sense then, a scale of 1-7 is exactly the same thing as a scale of a-g.
386 |
387 | ## median testing ranks
388 |
389 | we use Mann-Whitney sums to test that the ranks are centered the same way
390 |
391 | ```{r}
392 | wilcox.test(dat$outside.barriers, dat$inside.barriers, alternative = "two.sided", paired = FALSE, mu = 0, conf.level = 0.95)
393 | ```
394 |
395 | see how this setup looks exactly like a t-test? that's not an accident
396 |
397 | ## correlating ranks
398 |
399 | this is just like the `cor.test` you did above, but with `method` set to equal 'spearman' instead of pearson
400 |
401 | ```{r}
402 | cor.test(dat$outside.barriers, dat$inside.barriers, method = 'spearman')
403 | ```
404 |
405 | rho is pretty close to the r from above
406 |
407 | ## chisq
408 |
409 | what if both of your variables are categories? we can test their counts with R's built in `chisq.test` function
410 |
411 | i.e. what if we want to know if gender is distributed evenly over departments?
412 |
413 | ```{r}
414 | chisq.test(dat$gender, dat$department)
415 | ```
416 |
417 | # Practice
418 |
419 | ## Assignment
420 |
421 | There were a lot of variables in this dataset that we did not look at today:
422 |
423 | ```{r}
424 | names(data)
425 | ```
426 |
427 | Choose two of those variables, and explore their distribution and relationship to each other. Can you conclude anything about the D-Lab based on the feedback?
428 |
429 | # Acknowledgements
430 |
431 | ## Materials taken from:
432 |
433 | [D-Lab's Feedback Analytics](https://github.com/dlab-berkeley/feedback-analytics)
--------------------------------------------------------------------------------
/instructor/day_three.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/instructor/day_three.pdf
--------------------------------------------------------------------------------
/instructor/day_two.R:
--------------------------------------------------------------------------------
1 | ## ---- echo=FALSE---------------------------------------------------------
2 | knitr::opts_knit$set(root.dir = '../')
3 |
4 | ## ---- eval=FALSE---------------------------------------------------------
5 | ## data(state)
6 | ## str(state.x77)
7 |
8 | ## ------------------------------------------------------------------------
9 | state.division
10 | length(state.division)
11 | levels(state.division)
12 |
13 | ## ------------------------------------------------------------------------
14 | state <- state.x77
15 | rm(state.x77)
16 | state <- as.data.frame(state)
17 | head(state)
18 |
19 | ## ------------------------------------------------------------------------
20 | my.data <- data.frame(n = c(1, 2, 3),
21 | c=c('one', 'two', 'three'),
22 | b=c(TRUE, TRUE, FALSE),
23 | d=c(as.Date("2015-07-27"),
24 | as.Date("2015-07-27")+7,
25 | as.Date("2015-07-27")-7),
26 | really.long.and.complicated.variable.name=999)
27 |
28 | ## ------------------------------------------------------------------------
29 | str(my.data)
30 |
31 | ## ------------------------------------------------------------------------
32 | read.table("data/mydata.csv", sep=',', header = TRUE)
33 |
34 | ## ------------------------------------------------------------------------
35 | read.csv("data/mydata.csv")
36 |
37 | ## ------------------------------------------------------------------------
38 | read.csv("data/mydata.csv", nrows=2)
39 |
40 | ## ------------------------------------------------------------------------
41 | load("data/mydata.Rda")
42 |
43 | ## ---- eval=FALSE---------------------------------------------------------
44 | ## # WARNING! xlsx package install crashed current version of RStudio
45 | ## install.packages("xlsx")
46 | ## library(xlsx)
47 | ## read.xlsx("data/cpds_excel_new.xlsx")
48 |
49 | ## ---- eval=FALSE---------------------------------------------------------
50 | ## # examples of these?
51 | ## install.packages("foreign")
52 | ## library(foreign)
53 | ## read.dta("data/cpds_stata.dta")
54 | ## read.spss()
55 | ## read.octave()
56 |
57 | ## ------------------------------------------------------------------------
58 | dirty <- read.csv('data/dirty.csv')
59 | str(dirty)
60 |
61 | ## ------------------------------------------------------------------------
62 | dirty <- read.csv('data/dirty.csv',stringsAsFactors = FALSE)
63 | str(dirty)
64 |
65 | ## ------------------------------------------------------------------------
66 | tail(dirty)
67 | dirty <- dirty[1:5,-6]
68 | dim(dirty)
69 |
70 | ## ------------------------------------------------------------------------
71 | names(dirty)
72 | names(dirty) <- c("time", "height", "dept", "enroll", "birth.order")
73 |
74 | ## ------------------------------------------------------------------------
75 | dirty$enroll
76 |
77 | ## ------------------------------------------------------------------------
78 | table(dirty$enroll)
79 | dirty$enroll[dirty$enroll=="999"] <- NA
80 | table(dirty$enroll, useNA = "ifany")
81 |
82 | ## ------------------------------------------------------------------------
83 | class(dirty$height)
84 | as.numeric(dirty$height)
85 |
86 | ## ------------------------------------------------------------------------
87 | dirty$height[grep("’", dirty$height, perl=TRUE)] <- 5*30.48 + 9*2.54
88 | dirty$height[2] <- 70*2.54
89 | dirty$height[3] <- 2.1*100
90 |
91 | ## ------------------------------------------------------------------------
92 | dirty$dept
93 | dirty$dept <- tolower(dirty$dept)
94 | dirty$dept <- gsub(' ', '', dirty$dept) # what did we just do?
95 | dirty$dept[4] <- "geology"
96 | dirty[dirty == "999"] <- NA
97 |
98 | ## ------------------------------------------------------------------------
99 | dirty$time <- as.Date(dirty$time,'%m/%d/%Y')
100 | dirty$height <- as.numeric(dirty$height)
101 | dirty$dept <- as.factor(dirty$dept)
102 | dirty$enroll <- as.factor(dirty$enroll)
103 | dirty$birth.order <- as.numeric(dirty$birth.order)
104 | str(dirty)
105 |
106 | ## ------------------------------------------------------------------------
107 | na.omit(dirty)
108 |
109 | ## ------------------------------------------------------------------------
110 | nrow(dirty)
111 | sum(is.na(dirty$height))
112 | sum(is.na(dirty$birth.order))
113 | length(lm(height ~ birth.order,data=dirty)$fitted.values)
114 |
115 | ## ------------------------------------------------------------------------
116 | library(Amelia)
117 |
118 | ## ------------------------------------------------------------------------
119 | large <- read.csv('data/large.csv')
120 | summary(large)
121 | nrow(na.omit(large))
122 |
123 | ## ------------------------------------------------------------------------
124 | a <- amelia(large,m = 1)
125 | print(a)
126 |
127 | ## ------------------------------------------------------------------------
128 | large.imputed <- a[[1]][[1]]
129 | summary(large.imputed)
130 |
131 | ## ------------------------------------------------------------------------
132 | a <- amelia(large[990:1000,],m = 1)
133 | print(a)
134 |
135 | ## ------------------------------------------------------------------------
136 | 1 == 2
137 | 1 != 1
138 | 1 >= 1
139 |
140 | ## ------------------------------------------------------------------------
141 | 1 >= c(0,1,2)
142 |
143 | ## ------------------------------------------------------------------------
144 | c(1,2) >= c(1,2,3)
145 | c(1,2) >= c(1,2,3,4) # why no warning this time? R recycles!
146 |
147 | ## ------------------------------------------------------------------------
148 | my.data$numeric == 2
149 | my.data[my.data$numeric == 2,]
150 |
151 | ## ------------------------------------------------------------------------
152 | my.data[my.data$b,]
153 |
154 | ## ------------------------------------------------------------------------
155 | my.data[,'d']
156 |
157 | ## ------------------------------------------------------------------------
158 | good.things <- c("three", "four", "five")
159 | my.data[my.data$character %in% good.things, ]
160 |
161 | ## ------------------------------------------------------------------------
162 | str(my.data[!(my.data$character %in% good.things), ])
163 |
164 | ## ------------------------------------------------------------------------
165 | str(my.data$numeric)
166 |
167 | ## ---- eval=FALSE---------------------------------------------------------
168 | ## install.packages('tidyr')
169 | ## install.packages('stringr')
170 | ## install.packages('dplyr')
171 |
172 | ## ------------------------------------------------------------------------
173 | library(tidyr)
174 | library(stringr)
175 | library(dplyr)
176 |
177 | ## ------------------------------------------------------------------------
178 | abnormal <- data.frame(name = c('Alice','Bob','Eve'),
179 | time1 = c(90,90,150),
180 | time2 = c(100,95,100))
181 |
182 | ## ------------------------------------------------------------------------
183 | normal <- gather(abnormal, "time", "score", time1, time2)
184 | normal
185 |
186 | ## ------------------------------------------------------------------------
187 | normal$id <- seq(1:nrow(normal))
188 | normal$time <- str_replace(normal$time,'time','')
189 | normal$time <- as.numeric(normal$time)
190 |
191 | ## ------------------------------------------------------------------------
192 | normal[normal$time == 1,]
193 | normal[normal$name == 'Alice',]
194 |
195 | ## ------------------------------------------------------------------------
196 | t.test(score ~ time, data=normal)
197 |
198 | ## ------------------------------------------------------------------------
199 | data.1 <- read.csv('data/merge_practice_1.csv')
200 | data.2 <- read.csv('data/merge_practice_2.csv')
201 | str(data.1)
202 | str(data.2)
203 |
204 | ## ------------------------------------------------------------------------
205 | merge(data.1, data.2, by = 'id')
206 |
207 | ## ------------------------------------------------------------------------
208 | merge(data.1, data.2, by = 'id', all = TRUE)
209 |
210 | ## ------------------------------------------------------------------------
211 | lookup <- read.csv('data/merge_practice_3.csv')
212 | str(lookup)
213 |
214 | ## ------------------------------------------------------------------------
215 | merge(data.1, lookup, by = "location")
216 |
217 | ## ------------------------------------------------------------------------
218 | lookup[lookup$location == 'Reno', ]
219 |
220 | ## ------------------------------------------------------------------------
221 | library(dplyr)
222 |
223 | ## ------------------------------------------------------------------------
224 | normal
225 | arrange(normal, score)
226 |
227 | ## ------------------------------------------------------------------------
228 | summarise(normal, mean(score), sd(score))
229 |
230 | ## ------------------------------------------------------------------------
231 | group_by(normal, time)
232 | summarize(group_by(normal, time), mean(score))
233 | mutate(group_by(normal, time), diff=score-mean(score))
234 | ungroup(mutate(group_by(normal, time), diff=score-mean(score)))
235 |
236 | ## ------------------------------------------------------------------------
237 | normal %>% group_by(time) %>% mutate(diff=score-mean(score)) %>% ungroup() -> super
238 |
239 | ## ------------------------------------------------------------------------
240 | library(foreign)
241 | pew <- as.data.frame(read.spss("data/pew.sav"))
242 | religion <- pew[c("q16", "reltrad", "income")]
243 | rm(pew)
244 |
245 | ## ------------------------------------------------------------------------
246 | religion$reltrad <- as.character(religion$reltrad)
247 | religion$reltrad <- str_replace(religion$reltrad, " Churches", "")
248 | religion$reltrad <- str_replace(religion$reltrad, " Protestant", " Prot")
249 | religion$reltrad[religion$q16 == " Atheist (do not believe in God) "] <- "Atheist"
250 | religion$reltrad[religion$q16 == " Agnostic (not sure if there is a God) "] <- "Agnostic"
251 | religion$reltrad <- str_trim(religion$reltrad)
252 | religion$reltrad <- str_replace_all(religion$reltrad, " \\(.*?\\)", "")
253 |
254 | religion$income <- c("Less than $10,000" = "<$10k",
255 | "10 to under $20,000" = "$10-20k",
256 | "20 to under $30,000" = "$20-30k",
257 | "30 to under $40,000" = "$30-40k",
258 | "40 to under $50,000" = "$40-50k",
259 | "50 to under $75,000" = "$50-75k",
260 | "75 to under $100,000" = "$75-100k",
261 | "100 to under $150,000" = "$100-150k",
262 | "$150,000 or more" = ">150k",
263 | "Don't know/Refused (VOL)" = "Don't know/refused")[religion$income]
264 |
265 | religion$income <- factor(religion$income, levels = c("<$10k", "$10-20k", "$20-30k", "$30-40k", "$40-50k", "$50-75k",
266 | "$75-100k", "$100-150k", ">150k", "Don't know/refused"))
267 |
268 | ## ---- eval=FALSE---------------------------------------------------------
269 | ## religion <- count(religion, reltrad, income)
270 | ## names(religion)[1] <- "religion"
271 |
272 |
--------------------------------------------------------------------------------
/instructor/day_two.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'Day Two: Data Cleaning'
3 | author: ["Dillon Niederhut", "Shinhye Choi"]
4 | date: "`r format(Sys.time(), '%d %B, %Y')`"
5 | output:
6 | - pdf_document
7 | - slidy_presentation
8 | ---
9 |
10 | ```{r, echo=FALSE}
11 | knitr::opts_knit$set(root.dir = '../')
12 | ```
13 |
14 | # Review
15 |
16 | ## Inspecting objects
17 |
18 | we'll start by using some data that is already in R
19 |
20 | ```{r, eval=FALSE}
21 | data(state)
22 | str(state.x77)
23 | ```
24 |
25 | ## Inspecting variables
26 |
27 | We should see 50 levels in this division variable
28 |
29 | ```{r}
30 | state.division
31 | length(state.division)
32 | levels(state.division)
33 | ```
34 |
35 | ## Inspecting data frames
36 |
37 | recall, a dataframe is a list of vectors, where each vector is one variable with all of its measurements
38 |
39 | R expects dataframes to be rectangular
40 |
41 | ```{r}
42 | state <- state.x77
43 | rm(state.x77)
44 | state <- as.data.frame(state)
45 | head(state)
46 | ```
47 |
48 | ## Introduction
49 |
50 | Today's class will be essentially be split into two components: CRUD operations in R and TIDY data. For more on tidiness in data, see [Hadley Wickham's paper](www.jstatsoft.org/v59/i10/paper). We will also touch on missingness - for an accessible introduction, you can read [this very old and no longer state-of-the-art paper](http://psycnet.apa.org/journals/met/7/2/147/).
51 |
52 | yesterday we saw how to create dataframes in R
53 |
54 | ```{r}
55 | my.data <- data.frame(n = c(1, 2, 3),
56 | c=c('one', 'two', 'three'),
57 | b=c(TRUE, TRUE, FALSE),
58 | d=c(as.Date("2015-07-27"),
59 | as.Date("2015-07-27")+7,
60 | as.Date("2015-07-27")-7),
61 | really.long.and.complicated.variable.name=999)
62 | ```
63 |
64 | remember, you can learn about dataframes with
65 |
66 | ```{r}
67 | str(my.data)
68 | ```
69 |
70 | in practice, you will only rarely create dataframes by hand, because creating tables in a text editor is both boring and prone to error
71 |
72 | ## Readibility
73 |
74 | we've broken up the previous command across multiple lines to make it easier to read
75 | this is a stylistic choice, and one that should be encouraged: however, it won't be obvious to most of the students that it is necessary to either highlight the whole command and run, or hit run for every line, starting from the first one, in order
76 |
77 | often, students will just run the second line, and be confused when nothing runs correctly in the console anymore - the way to get out of this is by hitting `ESC`
78 |
79 | # Reading dataframes from file
80 |
81 | ## why read data from text files?
82 |
83 | they are human-readable and highly interoperable
84 |
85 | ```{r}
86 | read.table("data/mydata.csv", sep=',', header = TRUE)
87 | ```
88 |
89 | > side note - anyone who is 100% new to computing will have a hard time understanding the concept of a working directory, and will try to run this code from their home directory (spoiler alert - it doesn't work)
90 |
91 | ## R has convenience wrappers for reading in tables
92 |
93 | ```{r}
94 | read.csv("data/mydata.csv")
95 | ```
96 |
97 | note that we are only reading the files by doing this
98 |
99 | ## R lets you read in part of a table
100 |
101 | you'll sometimes find that you want to work with a smaller part of a dataset - maybe because the data is too large to fit into memory, or maybe because you want to test out some code on a small piece of the data so it runs faster
102 |
103 | ```{r}
104 | read.csv("data/mydata.csv", nrows=2)
105 | ```
106 |
107 | note that `nrows` is **not** equal to the number of lines in the file, because it does not include the file header
108 |
109 | ## R also has its own kind of data file
110 |
111 | ```{r}
112 | load("data/mydata.Rda")
113 | ```
114 |
115 | the `load` function does actually put the file into memory, and with the name you originally gave it when you saved it
116 |
117 | this is typically a bad thing, and there is currently no easy workaround
118 |
119 | ## to read in tables from excel, use the `xlsx` package
120 |
121 | if you are exporting data from excel, be sure to export datetimes as strings, as excel does not store dates internally the same way Unix does
122 |
123 | ```{r, eval=FALSE}
124 | # WARNING! xlsx package install crashed current version of RStudio
125 | install.packages("xlsx")
126 | library(xlsx)
127 | read.xlsx("data/cpds_excel_new.xlsx")
128 | ```
129 | But it may be better to save your .xlsx file as a csv. format in Excel first, and then read the csv file into R.
130 |
131 | ## you can also use R to read in data from proprietary software
132 |
133 | ```{r, eval=FALSE}
134 | # examples of these?
135 | install.packages("foreign")
136 | library(foreign)
137 | read.dta("data/cpds_stata.dta")
138 | read.spss()
139 | read.octave()
140 | ```
141 |
142 | # Cleaning data
143 |
144 | there are two major steps to data cleaning, which we will call 'sanitizing' and 'tidying'
145 |
146 | in sanitizing, our goal is to take each variable and force its values to be honest representations of its levels
147 |
148 | in tidying, we are arranging our data structurally such that each row contains exactly one observation, and each column contains exactly one kind of data about that observation (this is sometimes expressed in SQL terms as "An attribute must tell something about the key, the whole key, and nothing but the key, so help me Codd")
149 |
150 | ## exporting data from other software can do weird things to numbers and factors
151 |
152 | ```{r}
153 | dirty <- read.csv('data/dirty.csv')
154 | str(dirty)
155 | ```
156 |
157 | ## it's usually better to DISABLE R's intuition about data types
158 |
159 | unless you already know the data is clean and has no non-factor strings in it (i.e. you are the one who created it)
160 |
161 | ```{r}
162 | dirty <- read.csv('data/dirty.csv',stringsAsFactors = FALSE)
163 | str(dirty)
164 | ```
165 |
166 | ## let's start by removing the empty rows and columns
167 |
168 | ```{r}
169 | tail(dirty)
170 | dirty <- dirty[1:5,-6]
171 | dim(dirty)
172 | ```
173 |
174 | ## you can replace variable names
175 |
176 | and you should, if they are uninformative or long
177 |
178 | ```{r}
179 | names(dirty)
180 | names(dirty) <- c("time", "height", "dept", "enroll", "birth.order")
181 | ```
182 |
183 | ## it's common for hand-coded data to have a signifier for subject-missingness
184 |
185 | (to help differentiate it from your hand-coder forgetting to do something)
186 |
187 | ```{r}
188 | dirty$enroll
189 | ```
190 |
191 | ## you should replace all of these values in your dataframe with R's missingness signifier, `NA`
192 |
193 | ```{r}
194 | table(dirty$enroll)
195 | dirty$enroll[dirty$enroll=="999"] <- NA
196 | table(dirty$enroll, useNA = "ifany")
197 | ```
198 |
199 | > side note - read.table() has an option to specify field values as `NA` as soon as you import the data, but this is a BAAAAD idea because R automatically encodes blank fields as missing too, and thus you lose the ability to distinguish between user-missing and experimenter-missing
200 |
201 | ## the height variable is in four different units
202 |
203 | we can fix this with a somewhat complicated loop (since R started as a functional language, there are not easy ways to conditionally modify structures in place)
204 |
205 | OR
206 |
207 | we can do the same task line-by-line, since the number of observations is small
208 |
209 | ```{r}
210 | class(dirty$height)
211 | as.numeric(dirty$height)
212 | ```
213 |
214 | because there are apostrophes and quotation marks, R thinks these are strings
215 |
216 | ```{r}
217 | dirty$height[grep("’", dirty$height, perl=TRUE)] <- 5*30.48 + 9*2.54
218 | dirty$height[2] <- 70*2.54
219 | dirty$height[3] <- 2.1*100
220 | ```
221 |
222 | ## let's fix some of those department spellings
223 |
224 | first, let's make this all lowercase
225 |
226 | ```{r}
227 | dirty$dept
228 | dirty$dept <- tolower(dirty$dept)
229 | dirty$dept <- gsub(' ', '', dirty$dept) # what did we just do?
230 | dirty$dept[4] <- "geology"
231 | dirty[dirty == "999"] <- NA
232 | ```
233 |
234 | ## then, you can coerce the data into the types they should be
235 |
236 | ```{r}
237 | dirty$time <- as.Date(dirty$time,'%m/%d/%Y')
238 | dirty$height <- as.numeric(dirty$height)
239 | dirty$dept <- as.factor(dirty$dept)
240 | dirty$enroll <- as.factor(dirty$enroll)
241 | dirty$birth.order <- as.numeric(dirty$birth.order)
242 | str(dirty)
243 | ```
244 |
245 | # Missingness
246 |
247 | there are many reasons why you might have missing data
248 |
249 | *AS LONG AS MISSINGNESS IS NOT CAUSED BY YOUR INDEPENDENT VARIABLE* this is fine
250 |
251 | deleting those observations is wasteful, but easy (listwise deletion)
252 |
253 | ignoring the individual missing data points is typical (casewise deletion)
254 |
255 | imputing mean values for missing data is possibly the worst thing you can do
256 |
257 | imputing via MI + error is currently the best option
258 |
259 | ## listwise deletion is wasteful
260 |
261 | ```{r}
262 | na.omit(dirty)
263 | ```
264 |
265 | ## casewise deletion is what R does internally
266 |
267 | ```{r}
268 | nrow(dirty)
269 | sum(is.na(dirty$height))
270 | sum(is.na(dirty$birth.order))
271 | length(lm(height ~ birth.order,data=dirty)$fitted.values)
272 | ```
273 |
274 | this is usually the default strategy
275 |
276 | ## remember how we talked about the extensibility of R?
277 |
278 | amelia is a package that makes a complicated MI approach work without you knowing anything about its implementation
279 |
280 | ```{r}
281 | library(Amelia)
282 | ```
283 |
284 | ## let's use this large dataset as an example
285 |
286 | ```{r}
287 | large <- read.csv('data/large.csv')
288 | summary(large)
289 | nrow(na.omit(large))
290 | ```
291 |
292 | ## for it to work you need low missingness and large N
293 |
294 | ```{r}
295 | a <- amelia(large,m = 1)
296 | print(a)
297 | ```
298 |
299 | ## amelia returns a list, where the first item is a list of your imputations
300 |
301 | we only did one, so here it is
302 |
303 | ```{r}
304 | large.imputed <- a[[1]][[1]]
305 | summary(large.imputed)
306 | ```
307 |
308 | ## if you give it a tiny dataset, it will fuss at you
309 |
310 | ```{r}
311 | a <- amelia(large[990:1000,],m = 1)
312 | print(a)
313 | ```
314 |
315 | # Reshaping
316 |
317 | now that our data is clean, it's time to put it in a tidy format. this is a way of storing data that makes it easy to:
318 |
319 | 1. make graphs
320 | 2. run tests
321 | 3. summarize
322 | 4. transform into other formats
323 |
324 | we are basically trying to organize ourselves such that:
325 |
326 | 1. any grouping is made on rows
327 | 2. any testing is done between columns
328 |
329 | ## an aside on testing
330 |
331 | in R, you use double symbols for testing
332 |
333 | ```{r}
334 | 1 == 2
335 | 1 != 1
336 | 1 >= 1
337 | ```
338 |
339 | (you've already seen a couple of these)
340 |
341 | ## tests return boolean vectors
342 |
343 | ```{r}
344 | 1 >= c(0,1,2)
345 | ```
346 |
347 | ## recall that boolean vectors need to be the same length or a divisor
348 |
349 | if your vectors are not multiples of each other, R will fuss at you
350 |
351 | ```{r}
352 | c(1,2) >= c(1,2,3)
353 | c(1,2) >= c(1,2,3,4) # why no warning this time? R recycles!
354 | ```
355 |
356 | the combination of the length requirement, the lack of support in R for proper indexing, and missingness in your data will cause many headaches later on
357 |
358 | ## subsetting data frames
359 |
360 | subsetting your data is where you will use this regularly
361 |
362 | ```{r}
363 | my.data$numeric == 2
364 | my.data[my.data$numeric == 2,]
365 | ```
366 |
367 | ## boolean variables can act as filters right out of the box
368 |
369 | ```{r}
370 | my.data[my.data$b,]
371 | ```
372 |
373 | you see the empty space after the comma? that tells R to grab all the columns
374 |
375 | ## you can also select columns
376 |
377 | ```{r}
378 | my.data[,'d']
379 | ```
380 |
381 | that empy space **before** the comma? that tells R to grab all the rows
382 |
383 | ## you can also match elements from a vector
384 |
385 | ```{r}
386 | good.things <- c("three", "four", "five")
387 | my.data[my.data$character %in% good.things, ]
388 | ```
389 |
390 | ## most subsetting operations on dataframes also return a dataframe
391 |
392 | ```{r}
393 | str(my.data[!(my.data$character %in% good.things), ])
394 | ```
395 |
396 | ## subsets that are a single column return a vector
397 |
398 | ```{r}
399 | str(my.data$numeric)
400 | ```
401 |
402 | ## most tidying can be done with two R packages
403 |
404 | (plus a wrapper around the base string functions)
405 |
406 | ```{r, eval=FALSE}
407 | install.packages('tidyr')
408 | install.packages('stringr')
409 | install.packages('dplyr')
410 | ```
411 |
412 | ```{r}
413 | library(tidyr)
414 | library(stringr)
415 | library(dplyr)
416 | ```
417 |
418 | ## reshaping
419 |
420 | our goal here is to arrange our data such that each table is about one kind of thing: whether it is everything about a measurement, everything about a person, or everything about a group of people
421 |
422 | ```{r}
423 | abnormal <- data.frame(name = c('Alice','Bob','Eve'),
424 | time1 = c(90,90,150),
425 | time2 = c(100,95,100))
426 | ```
427 |
428 | this table is not tidy - why not?
429 |
430 | the table is about measurements, but each measurement does not have its own row, and each type of measurement value is represented by more than one column
431 |
432 | ```{r}
433 | normal <- gather(abnormal, "time", "score", time1, time2)
434 | normal
435 | ```
436 |
437 | we can gather the two columns with time data into a column representing just time, and another representing just scores
438 |
439 | now that each row is a unique observation, we can clean up the dataframe a bit
440 |
441 | ```{r}
442 | normal$id <- seq(1:nrow(normal))
443 | normal$time <- str_replace(normal$time,'time','')
444 | normal$time <- as.numeric(normal$time)
445 | ```
446 |
447 | now that we are in a tidy format, see how easy it is to subset
448 |
449 | ```{r}
450 | normal[normal$time == 1,]
451 | normal[normal$name == 'Alice',]
452 | ```
453 |
454 | and test
455 |
456 | > side note - don't worry about how this works yet - we'll talk about it tomorrow
457 |
458 | ```{r}
459 | t.test(score ~ time, data=normal)
460 | ```
461 |
462 | it's easy to combine tidy tables to compare different levels of information simultaneously
463 |
464 | # Merging data frames
465 |
466 | ## flexibly join dataframes with `merge`
467 |
468 | imagine you have two datasets that you want to merge
469 |
470 | ```{r}
471 | data.1 <- read.csv('data/merge_practice_1.csv')
472 | data.2 <- read.csv('data/merge_practice_2.csv')
473 | str(data.1)
474 | str(data.2)
475 | ```
476 |
477 | sometimes the same people have differet jobs in different locations
478 |
479 | you can do an *inner* join using merge
480 |
481 | ```{r}
482 | merge(data.1, data.2, by = 'id')
483 | ```
484 |
485 | that's no good - we lost half of our people!
486 |
487 | inner joins are mostly used when you **only** want records that appear in both tables
488 |
489 | if you want the union, you can use an outer join
490 |
491 | ```{r}
492 | merge(data.1, data.2, by = 'id', all = TRUE)
493 | ```
494 |
495 | this works basically the same as `join` in SQL
496 |
497 | running merges is particularly useful when:
498 |
499 | a. your data is tidy; and,
500 | b. you want to add information with a lookup table
501 |
502 | in this case, you can store your lookup table as a dataframe, then merge it
503 |
504 | ```{r}
505 | lookup <- read.csv('data/merge_practice_3.csv')
506 | str(lookup)
507 | ```
508 |
509 | this lookup table gives us the population for each location
510 |
511 | we can add this to our people table with
512 |
513 | ```{r}
514 | merge(data.1, lookup, by = "location")
515 | ```
516 |
517 | note that Reno was in our lookup table
518 |
519 | ```{r}
520 | lookup[lookup$location == 'Reno', ]
521 | ```
522 |
523 | but doesn't show up when we merge - why do you think this is?
524 |
525 | # Transforming data
526 |
527 | ## introduction
528 |
529 | because R started out as a functional language, it can be hard to modify data, especially in place
530 |
531 | in practice, if you want 100% control over how your frames are being modified, you'll be writing lots of `for` loops, which is messy
532 |
533 | luckily, there is a package that handles the common tasks for you
534 |
535 | ```{r}
536 | library(dplyr)
537 | ```
538 |
539 | ## sort data with `arranage`
540 |
541 | base R syntax for sorting is a bit of a pain in that you have to create a sorting vector based on the values in a column, then subset the same dataframe and apply the sorting vector to the rows slice
542 |
543 | to demonstrate this, let's have another look at our 'normal' data frame
544 |
545 | ```{r}
546 | normal
547 | arrange(normal, score)
548 | ```
549 |
550 | ## apply summary fucntions with `summarise`
551 |
552 | dplyr includes most of the base R summary statistics, along with:
553 |
554 | * `n()`
555 | * `n_distinct()`
556 | * `first()`
557 | * `last()`
558 |
559 | if we want to get the mean and sd for the scores, we can do
560 |
561 | ```{r}
562 | summarise(normal, mean(score), sd(score))
563 | ```
564 |
565 | ## dplyr allows you to apply functions to groups
566 |
567 | so far, these have taken base R functions and made them faster (with C++ calls behind the scenes), easier to use, or both
568 |
569 | dplyr's real utility is in its grouped dataframes, which apply dplyr functions groupwise
570 |
571 | let's say that we want to know the rank at each time -- we can `groupby` time and then do some variable transformation
572 |
573 | ```{r}
574 | group_by(normal, time)
575 | summarize(group_by(normal, time), mean(score))
576 | mutate(group_by(normal, time), diff=score-mean(score))
577 | ungroup(mutate(group_by(normal, time), diff=score-mean(score)))
578 | ```
579 |
580 | you can add as many functions as you want inbetween, but wrapping function call around function call can be hard to read (and write!)
581 |
582 | ## you can pipe functions with the `%>%` operator
583 |
584 | pipes take the output of one function and give it as an input to the next function, without deep nesting of functions nor saving all of the intermediate steps
585 |
586 | this makes code a lot easier to read, and to understand
587 |
588 | ```{r}
589 | normal %>% group_by(time) %>% mutate(diff=score-mean(score)) %>% ungroup() -> super
590 | ```
591 |
592 | # Practice
593 |
594 | ## Grab some data from Pew
595 |
596 | and sanitize/tidy it
597 |
598 | this will be hard
599 |
600 | ```{r}
601 | library(foreign)
602 | pew <- as.data.frame(read.spss("data/pew.sav"))
603 | religion <- pew[c("q16", "reltrad", "income")]
604 | rm(pew)
605 | ```
606 |
607 | ## we'll start by cleaning up the factor variables
608 |
609 | ```{r}
610 | religion$reltrad <- as.character(religion$reltrad)
611 | religion$reltrad <- str_replace(religion$reltrad, " Churches", "")
612 | religion$reltrad <- str_replace(religion$reltrad, " Protestant", " Prot")
613 | religion$reltrad[religion$q16 == " Atheist (do not believe in God) "] <- "Atheist"
614 | religion$reltrad[religion$q16 == " Agnostic (not sure if there is a God) "] <- "Agnostic"
615 | religion$reltrad <- str_trim(religion$reltrad)
616 | religion$reltrad <- str_replace_all(religion$reltrad, " \\(.*?\\)", "")
617 |
618 | religion$income <- c("Less than $10,000" = "<$10k",
619 | "10 to under $20,000" = "$10-20k",
620 | "20 to under $30,000" = "$20-30k",
621 | "30 to under $40,000" = "$30-40k",
622 | "40 to under $50,000" = "$40-50k",
623 | "50 to under $75,000" = "$50-75k",
624 | "75 to under $100,000" = "$75-100k",
625 | "100 to under $150,000" = "$100-150k",
626 | "$150,000 or more" = ">150k",
627 | "Don't know/Refused (VOL)" = "Don't know/refused")[religion$income]
628 |
629 | religion$income <- factor(religion$income, levels = c("<$10k", "$10-20k", "$20-30k", "$30-40k", "$40-50k", "$50-75k",
630 | "$75-100k", "$100-150k", ">150k", "Don't know/refused"))
631 | ```
632 |
633 | ## now we can reduce this down to three columns for three variables
634 |
635 | ```{r, eval=FALSE}
636 | religion <- count(religion, reltrad, income)
637 | names(religion)[1] <- "religion"
638 | ```
639 |
640 | # Acknowledgements
641 |
642 | ## Materials taken from:
643 |
644 | [Chris Krogslund](https://github.com/ckrogs/r_useful_dlab)
645 | [Hadley Wickham](http://www.jstatsoft.org/v59/i10/paper)
646 |
--------------------------------------------------------------------------------
/instructor/day_two.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/instructor/day_two.pdf
--------------------------------------------------------------------------------
/instructor/overflow.R:
--------------------------------------------------------------------------------
1 | ## ---- echo=FALSE---------------------------------------------------------
2 | knitr::opts_knit$set(root.dir = '../')
3 |
4 | ## ------------------------------------------------------------------------
5 | #install.packages('RCurl')
6 | library(RCurl)
7 | #install.packages("XML")
8 | library(XML)
9 |
10 | ## ------------------------------------------------------------------------
11 | RJ <- readLines("http://shakespeare.mit.edu/romeo_juliet/full.html")
12 | RJ[1:25]
13 |
14 | ## ------------------------------------------------------------------------
15 | RJ[grep("", RJ, perl=T)]
16 | RJ[grep("", RJ, perl=TRUE)]
17 |
18 | ## ---- eval=FALSE---------------------------------------------------------
19 | ## link <- "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
20 | ## page <- getURL(url = link)
21 | ## xmlParse(file = page)
22 |
23 | ## ------------------------------------------------------------------------
24 | link<-"http://clerk.house.gov/evs/2014/ROLL_000.asp"
25 | readHTMLTable(doc=link, header=T, which=1, stringsAsFactors=F)[1:10, ]
26 |
27 | ## ---- eval=FALSE---------------------------------------------------------
28 | ## #are there websites that allow you to connect to test servers?
29 | ## install.packages("RMySQL")
30 | ## library(RMySQL)
31 | ## con <- dbConnect(MySQL(),
32 | ## user="", password="",
33 | ## dbname="", host="localhost")
34 | ## data <- fetch(dbSendQuery(con, "select * from table"), n=10)
35 | ## con.exit(dbDisconnect(con))
36 |
37 | ## ---- eval=FALSE---------------------------------------------------------
38 | ## install.packages("RPostgreSQL")
39 | ## library(RPostgreSQL)
40 | ## con <- dbConnect(dbDriver("PostgreSQL"),
41 | ## dbname="",
42 | ## host="localhost",
43 | ## port=1234,
44 | ## user="",
45 | ## password="")
46 | ## data <- dbReadTable(con, c("column1","column2"))
47 | ## dbDisconnect(con)
48 |
49 | ## ---- eval=FALSE---------------------------------------------------------
50 | ## install.packages("rmongodb")
51 | ## library(rmongodb)
52 | ## con <- mongo.create(host = localhost,
53 | ## name = "",
54 | ## username = "",
55 | ## password = "",
56 | ## db = "admin")
57 | ## if(mongo.is.connected(con) == TRUE) {
58 | ## data <- mongo.find.all(con, "collection", list("city" = list( "$exists" = "true")))
59 | ## }
60 | ## mongo.destroy(con)
61 |
62 | ## ---- eval=FALSE---------------------------------------------------------
63 | ## # plyr package
64 | ## mydata <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
65 | ## # Consider the case where we want to calculate descriptive statistics across admits and not-admits
66 | ## # from the dataset and return them as a data.frame
67 | ## ddata <- ddply(mydata, c("admit"), summarize,
68 | ## gpa.over3 = length(gpa[gpa>=3]),
69 | ## gpa.over3.5 = length(gpa[gpa>=3.5]),
70 | ## gpa.over3per = length(gpa[gpa>=3])/length(gpa),
71 | ## gpa.over3.5per = length(gpa[gpa>=3.5])/length(gpa))
72 | ## )
73 |
74 | ## ---- eval=FALSE---------------------------------------------------------
75 | ##
76 | ## mydata <- ddply(mydata, c("admit"), transform,
77 | ## gre.ave=mean(x=gre, na.rm=T),
78 | ## gre.sd = sd(x=gre, na.rm=T))
79 | ## head(mydata)
80 | ## unique(mydata$gre.ave)
81 | ## )
82 |
83 | ## ---- eval=FALSE---------------------------------------------------------
84 | ## # Another very useful function is arrange, which orders a data frame on the basis of column contents
85 | ## # arrange by "rank"
86 | ## mydata.rank <- plyr::arrange(mydata, rank)
87 | ## # arrange by "rank", descending
88 | ## mydata.rank <- plyr::arrange(mydata, desc(rank))
89 | ## # arrange by "rank", then "gre", then "gpa
90 | ## mydata.comb <- plyr::arrange(mydata, rank, desc(gre), desc(gpa))
91 | ## head(mydata.comb)
92 |
93 |
--------------------------------------------------------------------------------
/instructor/overflow.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Additional Course Materials"
3 | author: "Dillon Niederhut"
4 | date: "`r format(Sys.time(), '%d %B, %Y')`"
5 | output:
6 | - pdf_document
7 | - slidy_presentation
8 | ---
9 |
10 | ```{r, echo=FALSE}
11 | knitr::opts_knit$set(root.dir = '../')
12 | ```
13 |
14 | ## Introduction
15 |
16 | The following are materials that do not fit into the course as currently taught, but that may be useful for students later on.
17 |
18 | # Data does not need to be in the local filesystem
19 |
20 | ## R has an interface to curl called RCurl
21 |
22 | ```{r}
23 | #install.packages('RCurl')
24 | library(RCurl)
25 | #install.packages("XML")
26 | library(XML)
27 | ```
28 |
29 | ## you can use this to access remote data
30 |
31 | you may just want to read text lines from a webpage
32 |
33 | ```{r}
34 | RJ <- readLines("http://shakespeare.mit.edu/romeo_juliet/full.html")
35 | RJ[1:25]
36 | ```
37 |
38 | and use the kinds of string manipulation we learned yesterday to retrieve the first lines of an act or a scene
39 |
40 | ```{r}
41 | RJ[grep("", RJ, perl=T)]
42 | RJ[grep("", RJ, perl=TRUE)]
43 | ```
44 |
45 | or maybe pull information out of an RSS feed
46 |
47 | ```{r, eval=FALSE}
48 | link <- "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
49 | page <- getURL(url = link)
50 | xmlParse(file = page)
51 | ```
52 |
53 | ## R also has libraries for pulling and parsing web pages
54 |
55 | ```{r}
56 | link<-"http://clerk.house.gov/evs/2014/ROLL_000.asp"
57 | readHTMLTable(doc=link, header=T, which=1, stringsAsFactors=F)[1:10, ]
58 | ```
59 |
60 | # Connecting to a database
61 |
62 | why read from a database? they use less memory, are faster, create their own backups, and offer optimized querying/joining
63 |
64 | databases generally come in two flavors, relational and non-relational, which has to do with how important schemas are (and is a bit beyond the scope of an R intro)
65 |
66 | two popular relational databases are SQL (or one of its many flavors)
67 |
68 | ```{r, eval=FALSE}
69 | #are there websites that allow you to connect to test servers?
70 | install.packages("RMySQL")
71 | library(RMySQL)
72 | con <- dbConnect(MySQL(),
73 | user="", password="",
74 | dbname="", host="localhost")
75 | data <- fetch(dbSendQuery(con, "select * from table"), n=10)
76 | con.exit(dbDisconnect(con))
77 | ```
78 |
79 | and postgres
80 |
81 | ```{r, eval=FALSE}
82 | install.packages("RPostgreSQL")
83 | library(RPostgreSQL)
84 | con <- dbConnect(dbDriver("PostgreSQL"),
85 | dbname="",
86 | host="localhost",
87 | port=1234,
88 | user="",
89 | password="")
90 | data <- dbReadTable(con, c("column1","column2"))
91 | dbDisconnect(con)
92 | ```
93 |
94 | a popular non-relational database is MongoDB
95 |
96 | ```{r, eval=FALSE}
97 | install.packages("rmongodb")
98 | library(rmongodb)
99 | con <- mongo.create(host = localhost,
100 | name = "",
101 | username = "",
102 | password = "",
103 | db = "admin")
104 | if(mongo.is.connected(con) == TRUE) {
105 | data <- mongo.find.all(con, "collection", list("city" = list( "$exists" = "true")))
106 | }
107 | mongo.destroy(con)
108 | ```
109 |
110 | one quirk about mongo is that your connection always authenticates to the authentication database, not the database you are querying - this db is usually called 'admin'
111 |
112 | # Data tidying with plyr
113 |
114 | ## enter plyr
115 |
116 | - *plyr* is the go-to package for all your splitting-applying-combining needs
117 | - Among its many benefits (above base R capabilities):
118 | a) Don't have to worry about different name, argument, or output consistencies
119 | b) Easily parallelized
120 | c) Input from, and output to, data frames, matricies, and lists
121 | d) Progress bars for lengthy computation
122 | e) Informative error messages
123 |
124 | ## group-wise operations/plyr/selecting functions
125 |
126 | - Two essential questions:
127 | 1. What is the class of your input object?
128 | 2. What is the class of your desired output object?
129 | - If you want to split a **d**ata frame, and return results as a **d**ata frame, you use **dd**ply
130 | - If you want to split a **d**ata frame, and return results as a **l**ist, you use **dl**ply
131 | - If you want to split a **l**ist, and return results as a **d**ata frame, you use **ld**ply
132 |
133 |
134 | ```{r, eval=FALSE}
135 | # plyr package
136 | mydata <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
137 | # Consider the case where we want to calculate descriptive statistics across admits and not-admits
138 | # from the dataset and return them as a data.frame
139 | ddata <- ddply(mydata, c("admit"), summarize,
140 | gpa.over3 = length(gpa[gpa>=3]),
141 | gpa.over3.5 = length(gpa[gpa>=3.5]),
142 | gpa.over3per = length(gpa[gpa>=3])/length(gpa),
143 | gpa.over3.5per = length(gpa[gpa>=3.5])/length(gpa))
144 | )
145 | ```
146 |
147 | # Group-wise Operations/plyr/functions
148 |
149 | - plyr can accomodate any user-defined function, but it also comes with some pre-defined functions that assist with the most common split-apply-combine tasks
150 | - We've already seen **summarize**, which creates user-specified vectors and combines them into a data.frame. Here are some other helpful functions:
151 |
152 | **transform**: applies a function to a data.frame and adds new vectors (columns) to it
153 |
154 | # add a column containing the average gre score of students
155 |
156 | ```{r, eval=FALSE}
157 |
158 | mydata <- ddply(mydata, c("admit"), transform,
159 | gre.ave=mean(x=gre, na.rm=T),
160 | gre.sd = sd(x=gre, na.rm=T))
161 | head(mydata)
162 | unique(mydata$gre.ave)
163 | )
164 | ```
165 |
166 | > side note: note that **transform** can't do transformations that involve the results of *other* transformations from the same call
167 |
168 | Another very useful function is **arrange**, which orders a data frame on the basis of column contents
169 |
170 | ```{r, eval=FALSE}
171 | # Another very useful function is arrange, which orders a data frame on the basis of column contents
172 | # arrange by "rank"
173 | mydata.rank <- plyr::arrange(mydata, rank)
174 | # arrange by "rank", descending
175 | mydata.rank <- plyr::arrange(mydata, desc(rank))
176 | # arrange by "rank", then "gre", then "gpa
177 | mydata.comb <- plyr::arrange(mydata, rank, desc(gre), desc(gpa))
178 | head(mydata.comb)
179 | ```
180 |
--------------------------------------------------------------------------------
/instructor/overflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlab-berkeley/R-for-Data-Science/798d5b220d66afea5644a245c7327e4bb740039a/instructor/overflow.pdf
--------------------------------------------------------------------------------
/scripts/feedback_cleaner.R:
--------------------------------------------------------------------------------
1 | # creating day_three dataset
2 | library(stringr)
3 |
4 | dat <- read.csv('../../feedback-analytics/feedback.csv', stringsAsFactors = FALSE)
5 | dat[dat == ""] <- NA
6 |
7 | # get rid of empty or identifying columns
8 | dat <- subset(dat, select = -c(I.will.use.my.new.powers..., Instructor, Date.of.Training, Training.Title,
9 | What.department..school..program..or.organization.at.Berkeley.are.you.associated.with..1))
10 | dat <- Filter(function(x) !all(is.na(x)), dat)
11 |
12 | # simplify column names
13 | new.names <- c("timestamp", "course.delivered", "instructor.communicated", "hear",
14 | "interest", "department", "verbs", "useful", "gender", "ethnicity",
15 | "outside.barriers", "inside.barriers", "what.barriers", "position")
16 | names(dat) <- new.names
17 |
18 | # fix timestamp
19 | dat$timestamp <- sub(' [0-9]+:[0-9]+:[0-9]+', '', dat$timestamp)
20 | dat$timestamp <- as.Date(dat$timestamp, "%m/%d/%Y")
21 |
22 | # entity resolution on departments
23 | dat$department <- str_trim(dat$department)
24 | dat$department <- str_to_lower(dat$department)
25 | dat$department <- sub('school of ', '', dat$department)
26 | for (pattern in c('afric','aas')) {
27 | dat[grepl(pattern, dat$department), 'department'] <- "African American Studies"
28 | }
29 | for (pattern in c('are')) {
30 | dat[grepl(pattern, dat$department), 'department'] <- "Ag & Resource Econ & Pol"
31 | }
32 | for (pattern in c('anth')) {
33 | dat[grepl(pattern, dat$department), 'department'] <- "Anthropology"
34 | }
35 | for (pattern in c('applied','ast')) {
36 | dat[grepl(pattern, dat$department), 'department'] <- "App Sci & Tech Grad Grp"
37 | }
38 | for (pattern in c('bio[ ]*stat')) {
39 | dat[grepl(pattern, dat$department), 'department'] <- "Biostatistics Grad Grp"
40 | }
41 | for (pattern in c('haas', 'business')) {
42 | dat[grepl(pattern, dat$department), 'department'] <- "Business"
43 | }
44 | for (pattern in c('crp', 'region', 'planning')) {
45 | dat[grepl(pattern, dat$department), 'department'] <- "City & Regional Planning"
46 | }
47 | for (pattern in c('demo')) {
48 | dat[grepl(pattern, dat$department), 'department'] <- "Demography"
49 | }
50 | for (pattern in c('econ')) {
51 | dat[grepl(pattern, dat$department), 'department'] <- "Economics"
52 | }
53 | for (pattern in c('ed[.]+', 'edu', 'gse', 'g.s.e.', 'pome')) {
54 | dat[grepl(pattern, dat$department), 'department'] <- "Education"
55 | }
56 | for (pattern in c('erg', 'energy')) {
57 | dat[grepl(pattern, dat$department), 'department'] <- "Energy & Resources Group"
58 | }
59 | for (pattern in c('espm', 'epsm')) {
60 | dat[grepl(pattern, dat$department), 'department'] <- "Env Sci, Policy, & Mgmt"
61 | }
62 | for (pattern in c('ethnic')) {
63 | dat[grepl(pattern, dat$department), 'department'] <- "Ethnic Studies Grad Grp"
64 | }
65 | for (pattern in c('geo')) {
66 | dat[grepl(pattern, dat$department), 'department'] <- "Geography"
67 | }
68 | for (pattern in c('hist')) {
69 | dat[grepl(pattern, dat$department), 'department'] <- "History"
70 | }
71 | for (pattern in c('ieor')) {
72 | dat[grepl(pattern, dat$department), 'department'] <- "Industrial Eng & Ops Rsch"
73 | }
74 | for (pattern in c('i school', 'info')) {
75 | dat[grepl(pattern, dat$department), 'department'] <- "Information"
76 | }
77 | for (pattern in c('ib', 'integrative')) {
78 | dat[grepl(pattern, dat$department), 'department'] <- "Integrative Biology"
79 | }
80 | for (pattern in c('jsp', 'jurisprudence')) {
81 | dat[grepl(pattern, dat$department), 'department'] <- "JSP Grad Pgm"
82 | }
83 | for (pattern in c('law$', 'law ')) {
84 | dat[grepl(pattern, dat$department), 'department'] <- "Law"
85 | }
86 | for (pattern in c('ling')) {
87 | dat[grepl(pattern, dat$department), 'department'] <- "Linguistics"
88 | }
89 | for (pattern in c('music')) {
90 | dat[grepl(pattern, dat$department), 'department'] <- "Music"
91 | }
92 | for (pattern in c('hwni', 'neuro', 'helen wills')) {
93 | dat[grepl(pattern, dat$department), 'department'] <- "Neuroscience"
94 | }
95 | for (pattern in c('pol[.]+', 'poli ', 'politic')) {
96 | dat[grepl(pattern, dat$department), 'department'] <- "Political Science"
97 | }
98 | for (pattern in c('psych')) {
99 | dat[grepl(pattern, dat$department), 'department'] <- "Psychology"
100 | }
101 | for (pattern in c('health', 'ph')) {
102 | dat[grepl(pattern, dat$department), 'department'] <- "Public Health"
103 | }
104 | for (pattern in c('gspp', 'policy', 'goldman')) {
105 | dat[grepl(pattern, dat$department), 'department'] <- "Public Policy"
106 | }
107 | for (pattern in c('rhet')) {
108 | dat[grepl(pattern, dat$department), 'department'] <- "Rhetoric"
109 | }
110 | for (pattern in c('iseees', 'slavic')) {
111 | dat[grepl(pattern, dat$department), 'department'] <- "Slavic Languages & Lit"
112 | }
113 | for (pattern in c('asian')) {
114 | dat[grepl(pattern, dat$department), 'department'] <- "South and Southeast Asian Studies"
115 | }
116 | for (pattern in c('welfare')) {
117 | dat[grepl(pattern, dat$department), 'department'] <- "Social Welfare"
118 | }
119 | for (pattern in c('socio', 'soc$', 'soc ')) {
120 | dat[grepl(pattern, dat$department), 'department'] <- "Sociology"
121 | }
122 | department.levels <- grep('[A-Z]+', unique(dat$department), value=TRUE)
123 | dat$department <- factor(dat$department, levels = sort(department.levels))
124 |
125 | # type other columns
126 | dat$hear <- as.factor(dat$hear)
127 | dat$gender <- factor(dat$gender, levels = c("Female/Woman", "Male/Man", "Genderqueer/Gender non-conforming"))
128 | dat$position <- as.factor(dat$position)
129 |
130 | # output
131 | save(dat, file='data/feedback.Rda')
132 |
--------------------------------------------------------------------------------
/scripts/regenerate_files.R:
--------------------------------------------------------------------------------
1 | #' This script regenerates the .pdf and .R files in the instructor
2 | #' directory
3 |
4 | #' function definitions
5 |
6 | install <- function(package){
7 | if ( !( package %in% installed.packages() ) ) {
8 | install.packages(package, dependencies=TRUE)
9 | }
10 | }
11 |
12 | write_document <- function(document){
13 | knitr::knit(document, tangle = TRUE)
14 | rmarkdown::render(document, output_format='all')
15 | }
16 |
17 | #' main call
18 |
19 | if ( 'scripts' %in% strsplit(getwd(), '/') ) {
20 | setwd('../instructor')
21 | } else {
22 | setwd('instructor')
23 | }
24 |
25 | for ( package in c('knitr', 'rmarkdown') ) {
26 | install(package)
27 | library(package, character.only=TRUE)
28 | }
29 |
30 | document_list = list.files(pattern='*.Rmd')
31 | lapply(document_list, FUN=write_document)
32 |
33 |
--------------------------------------------------------------------------------