├── Assignments
├── .gitignore
├── 17_Maps
│ ├── screen1.png
│ ├── .ipynb_checkpoints
│ │ └── Untitled-checkpoint.ipynb
│ ├── mapping_challenge.md
│ └── maps.md
├── 14_Transparency
│ ├── dog.jpg
│ ├── screen1.png
│ ├── screen2.png
│ ├── screen3.png
│ ├── Transparency.ipynb
│ └── .ipynb_checkpoints
│ │ └── Transparency-checkpoint.ipynb
├── 18_LatticeMaps
│ ├── map.png
│ └── latticemaps.md
├── 11_FinishingTouches
│ ├── unlpd.pdf
│ ├── chartannotated.png
│ └── .ipynb_checkpoints
│ │ └── FinishingTouches-checkpoint.ipynb
├── 1_Installations
│ ├── helloworld.png
│ ├── .ipynb_checkpoints
│ │ └── Hello World in R-checkpoint.ipynb
│ ├── Hello World in R.ipynb
│ └── installing_jupyter_notebook.md
├── 9_StackedAreaCharts
│ └── UNLPD.pdf
├── 2_R_Basics
│ ├── .ipynb_checkpoints
│ │ └── RBasics-checkpoint.ipynb
│ ├── RBasics.ipynb
│ └── RBasicsPart2.ipynb
├── 15_Treemaps
│ └── .ipynb_checkpoints
│ │ └── Treemaps-checkpoint.ipynb
├── 19_WaffleCharts
│ └── .ipynb_checkpoints
│ │ └── Untitled-checkpoint.ipynb
├── 8_LineCharts
│ └── .ipynb_checkpoints
│ │ └── LineCharts-checkpoint.ipynb
├── 10_Scatterplots
│ └── .ipynb_checkpoints
│ │ └── Scatterplots-checkpoint.ipynb
├── 12_BubbleCharts
│ └── .ipynb_checkpoints
│ │ └── BubbleCharts-checkpoint.ipynb
├── 13_LatticeCharts
│ └── .ipynb_checkpoints
│ │ └── FacetCharts-checkpoint.ipynb
├── 3_PercentChange
│ └── .ipynb_checkpoints
│ │ └── PercentChange-checkpoint.ipynb
├── 6_IntroToggplot
│ └── .ipynb_checkpoints
│ │ └── IntroToggplot-checkpoint.ipynb
├── 5_RecastingData
│ └── .ipynb_checkpoints
│ │ └── Recasting Data-checkpoint.ipynb
├── 4_WorkingWithDates
│ └── .ipynb_checkpoints
│ │ └── WorkingWithDates-checkpoint.ipynb
└── 16_LiveFireExercise
│ └── LiveFireExercise.ipynb
├── Lectures
├── Chartjunk.pptx
├── BeautyVsTufte.pptx
├── 1_Introductions.pptx
├── 2_WhatIsDataViz.pptx
├── FormAndFunction.pptx
├── HowYourBrainWorks1.pptx
├── AdvancedDataCleaning.pptx
├── ImprovingVisualizations.pptx
├── Cairo1InformationToWisdom.pptx
└── Class3_GraphicalExcellence.pptx
├── Resources
└── Git Quick Reference.pdf
├── .gitignore_global
├── .gitignore
├── Data
├── colleges.csv
├── enrollment.csv
├── fac2chp.csv
├── registeredvoters.csv
└── mountainlions.csv
├── README.md
├── LICENSE.md
└── syllabus.md
/Assignments/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/Lectures/Chartjunk.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Chartjunk.pptx
--------------------------------------------------------------------------------
/Lectures/BeautyVsTufte.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/BeautyVsTufte.pptx
--------------------------------------------------------------------------------
/Lectures/1_Introductions.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/1_Introductions.pptx
--------------------------------------------------------------------------------
/Lectures/2_WhatIsDataViz.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/2_WhatIsDataViz.pptx
--------------------------------------------------------------------------------
/Lectures/FormAndFunction.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/FormAndFunction.pptx
--------------------------------------------------------------------------------
/Assignments/17_Maps/screen1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/17_Maps/screen1.png
--------------------------------------------------------------------------------
/Lectures/HowYourBrainWorks1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/HowYourBrainWorks1.pptx
--------------------------------------------------------------------------------
/Resources/Git Quick Reference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Resources/Git Quick Reference.pdf
--------------------------------------------------------------------------------
/Assignments/14_Transparency/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/dog.jpg
--------------------------------------------------------------------------------
/Assignments/18_LatticeMaps/map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/18_LatticeMaps/map.png
--------------------------------------------------------------------------------
/Lectures/AdvancedDataCleaning.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/AdvancedDataCleaning.pptx
--------------------------------------------------------------------------------
/Lectures/ImprovingVisualizations.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/ImprovingVisualizations.pptx
--------------------------------------------------------------------------------
/Assignments/14_Transparency/screen1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen1.png
--------------------------------------------------------------------------------
/Assignments/14_Transparency/screen2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen2.png
--------------------------------------------------------------------------------
/Assignments/14_Transparency/screen3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen3.png
--------------------------------------------------------------------------------
/Lectures/Cairo1InformationToWisdom.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Cairo1InformationToWisdom.pptx
--------------------------------------------------------------------------------
/Lectures/Class3_GraphicalExcellence.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Class3_GraphicalExcellence.pptx
--------------------------------------------------------------------------------
/Assignments/11_FinishingTouches/unlpd.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/11_FinishingTouches/unlpd.pdf
--------------------------------------------------------------------------------
/Assignments/1_Installations/helloworld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/1_Installations/helloworld.png
--------------------------------------------------------------------------------
/Assignments/9_StackedAreaCharts/UNLPD.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/9_StackedAreaCharts/UNLPD.pdf
--------------------------------------------------------------------------------
/Assignments/17_Maps/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/2_R_Basics/.ipynb_checkpoints/RBasics-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/15_Treemaps/.ipynb_checkpoints/Treemaps-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/19_WaffleCharts/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/8_LineCharts/.ipynb_checkpoints/LineCharts-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/10_Scatterplots/.ipynb_checkpoints/Scatterplots-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/11_FinishingTouches/chartannotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/11_FinishingTouches/chartannotated.png
--------------------------------------------------------------------------------
/Assignments/12_BubbleCharts/.ipynb_checkpoints/BubbleCharts-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/13_LatticeCharts/.ipynb_checkpoints/FacetCharts-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/3_PercentChange/.ipynb_checkpoints/PercentChange-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/6_IntroToggplot/.ipynb_checkpoints/IntroToggplot-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/1_Installations/.ipynb_checkpoints/Hello World in R-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/5_RecastingData/.ipynb_checkpoints/Recasting Data-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/11_FinishingTouches/.ipynb_checkpoints/FinishingTouches-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Assignments/4_WorkingWithDates/.ipynb_checkpoints/WorkingWithDates-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/.gitignore_global:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.o
8 | *.so
9 |
10 | # Packages #
11 | ############
12 | # it's better to unpack these files and commit the raw source
13 | # git has its own built in compression methods
14 | *.7z
15 | *.dmg
16 | *.gz
17 | *.iso
18 | *.jar
19 | *.rar
20 | *.tar
21 | *.zip
22 |
23 | # Logs and databases #
24 | ######################
25 | *.log
26 | *.sql
27 | *.sqlite
28 |
29 | # OS generated files #
30 | ######################
31 | .DS_Store
32 | .DS_Store?
33 | ._*
34 | .Spotlight-V100
35 | .Trashes
36 | ehthumbs.db
37 | Thumbs.db
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.o
8 | *.so
9 |
10 | # Packages #
11 | ############
12 | # it's better to unpack these files and commit the raw source
13 | # git has its own built in compression methods
14 | *.7z
15 | *.dmg
16 | *.gz
17 | *.iso
18 | *.jar
19 | *.rar
20 | *.tar
21 | *.zip
22 |
23 | # Logs and databases #
24 | ######################
25 | *.log
26 | *.sql
27 | *.sqlite
28 |
29 | # OS generated files #
30 | ######################
31 | .DS_Store
32 | .DS_Store?
33 | ._*
34 | .Spotlight-V100
35 | .Trashes
36 | ehthumbs.db
37 | Thumbs.db
38 |
39 | # Microsoft shite #
40 | ~$*
41 |
42 | *__ANSWERS*
43 |
--------------------------------------------------------------------------------
/Data/colleges.csv:
--------------------------------------------------------------------------------
1 | UnitID,Name,InState1213,OutOfState1213,GradRate
151351,Indiana University-Bloomington,23116,44566,75
171100,Michigan State University,24028,43986,79
147767,Northwestern University,60840,60840,93
204796,Ohio State University-Main Campus,24919,40327,82
214777,Pennsylvania State University-Main Campus,31854,44156,86
243780,Purdue University-Main Campus,23468,42270,69
186380,Rutgers University-New Brunswick,28798,42118,79
145637,University of Illinois at Urbana-Champaign,28564,42706,84
153658,University of Iowa,21832,40054,70
163286,University of Maryland-College Park,23346,41725,82
170976,University of Michigan-Ann Arbor,25848,51976,91
174066,University of Minnesota-Twin Cities,25065,30315,73
181464,University of Nebraska-Lincoln,21700,34450,65
240444,University of Wisconsin-Madison,23762,40012,82
--------------------------------------------------------------------------------
/Data/enrollment.csv:
--------------------------------------------------------------------------------
1 | Year,Date,Enrollment
1967,1967-1-1,18067
1968,1968-1-1,19150
1969,1969-1-1,19618
1970,1970-1-1,20810
1971,1971-1-1,21541
1972,1972-1-1,21581
1973,1973-1-1,21160
1974,1974-1-1,20892
1975,1975-1-1,22380
1976,1976-1-1,22179
1977,1977-1-1,22256
1978,1978-1-1,22477
1979,1979-1-1,23661
1980,1980-1-1,24128
1981,1981-1-1,24786
1982,1982-1-1,25075
1983,1983-1-1,24789
1984,1984-1-1,24228
1985,1985-1-1,24020
1986,1986-1-1,23899
1987,1987-1-1,23469
1988,1988-1-1,23985
1989,1989-1-1,23926
1990,1990-1-1,24453
1991,1991-1-1,24620
1992,1992-1-1,24573
1993,1993-1-1,24491
1994,1994-1-1,23854
1995,1995-1-1,24320
1996,1996-1-1,23887
1997,1997-1-1,22827
1998,1998-1-1,22408
1999,1999-1-1,22142
2000,2000-1-1,22268
2001,2001-1-1,22764
2002,2002-1-1,22988
2003,2003-1-1,22559
2004,2004-1-1,21792
2005,2005-1-1,21675
2006,2006-1-1,22106
2007,2007-1-1,22973
2008,2008-1-1,23573
2009,2009-1-1,24100
2010,2010-1-1,24610
2011,2011-1-1,24593
2012,2012-1-1,24207
2013,2013-1-1,24445
2014,2014-1-1,25006
2015,2015-1-1,25260
2016,2016-1-1,25897
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # JOUR407 Data Visualization
2 |
3 | Course materials for a data visualization course taught at the University of Nebraska-Lincoln's College of Journalism and Mass Communications.
4 |
5 | ## About this course
6 |
7 | This course is first and foremost an experiment in student learning at a journalism school. It is not an online course. These materials are here to augment the classroom experience. The materials here are being posted publicly and are open for use, reuse or contributions if you so desire.
8 |
9 | ## Course goals
10 |
11 | * Introduce students to data visualization tools and techniques
12 | * Understand the theories of what makes for good data visualization
13 | * Get hands on with gathering, cleaning, refining and analyzing data before visualization
14 | * Get hands on with programmatic tools for analysis and visualization in R
15 | * Learn to apply best practices for good data visualization to each of the data visualization tools
16 | * Get experience using GitHub for version control
17 | * Get experience with other development environments when working with code.
18 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 |
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2013 Matthew D. Waite
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 |
8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
9 |
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Assignments/17_Maps/mapping_challenge.md:
--------------------------------------------------------------------------------
1 | # Mapping Challenge: Is it a map or something else?
2 |
3 | China announced this week that it will impose tariffs on US agricultural products. It is a move designed to [target Trump voters in midwestern states](https://www.politico.com/story/2018/04/04/how-china-will-target-us-agriculture-458530). Media have noted that Trump won most of the states where soybeans are produced.
4 |
5 | In my opinion, state level numbers are too imprecise. We need to go to the county level.
6 |
7 | The questions:
8 |
9 | * Where are soybeans produced in the United States?
10 | * How did Trump perform in those counties?
11 | * What are the population trends in those counties? Could the costs of these tariff's be more than money?
12 |
13 | What, in these questions, is a map?
14 |
15 | **Turn in what you have by the end of class.**
16 |
17 | ### Data
18 |
19 | * [Soybean production in bushels](https://www.dropbox.com/s/hnjx2dazkag62g5/soybeans2012.csv?dl=0), by county in 2012, from the USDA. Most recent data available.
20 | * [Election results by county](https://www.dropbox.com/s/uib1uc6dj0u99wt/2016president__county.csv?dl=0) in 2016, from OpenElex.
21 | * Population data you have from previous assignments.
22 |
--------------------------------------------------------------------------------
/Data/fac2chp.csv:
--------------------------------------------------------------------------------
1 | Department,TotalFaculty,CreditHours
2 | Advertising,24,3198
3 | Agricultural Economics,23,149
4 | "Agricultural Leadership, Education and Co",19,3370
5 | Agronomy and Horticulture,50,4949
6 | Animal Science,29,3331
7 | Anthropology,9,6016
8 | Architecture,28,3980
9 | Art and Art History,28,5378
10 | Biochemistry,22,1849
11 | Biological Systems Engineering,31,3175
12 | Broadcasting,7,454
13 | Business Administration,6,5583
14 | Chemical and Biomolecular Engineering,17,1713
15 | Chemistry,28,12998
16 | "Children, Youth and Family Studies",35,5085
17 | Civil Engineering,31,2195
18 | Classics & Religious Studies,14,3988
19 | Communication Studies,16,5626
20 | Community and Regional Planning,5,584
21 | Computer Science,25,6287
22 | Durham School Arch Engr & Const,36,3475
23 | Earth and Atmospheric Sciences,21,2554
24 | Economics,17,9152
25 | Educational Administration,20,1901
26 | Educational Psychology,27,4790
27 | Electrical & Computer Engineering,38,4495
28 | English,68,14936
29 | Entomology,13,1281
30 | Finance,25,6131
31 | History,32,7358
32 | Johnny Carson School of Theatre and Film,19,5071
33 | Journalism and Mass Communicat,25,5327
34 | Law,48,5753
35 | Management,25,10927
36 | Marketing,16,4838
37 | Mathematics,49,23018
38 | Mechanical & Materials Engineering,43,5737
39 | Modern Language & Literature,37,9486
40 | Nutrition & Health Sciences,34,8142
41 | Philosophy,13,3319
42 | Physics and Astronomy,30,7525
43 | Political Science,19,5011
44 | Psychology,35,12437
45 | Biological Sciences,44,8183
46 | School of Music,58,8962
47 | Sociology,24,5248
48 | Special Ed & Communication,56,5621
49 | Statistics,3,3773
50 | Teach/Learn & Teacher Ed,72,7935
51 | "Textiles, Merchandising and Fashion Desig Professor",12,1834
--------------------------------------------------------------------------------
/Assignments/1_Installations/Hello World in R.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Hello world\n",
8 | "\n",
9 | "Jupyter Notebook, which is what you're using now, is a browser based interactive code environment. In this case, we're using it for R, a stats language. The first thing you always do in a code environment -- it's a law -- is write code that prints Hello World! on the screen. So lets do that. In the next field open box, type this:\n",
10 | "\n",
11 | "```\n",
12 | "words <- \"Hello World!\"\n",
13 | "\n",
14 | "print(words)\n",
15 | "```"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": []
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "source": [
31 | "The next thing you need to do is add text like this. You can do that by just changing the cell type. You do that in the menu above. It looks like this:\n",
32 | "\n",
33 | "\n",
34 | "\n",
35 | "So, in the next field, change the field type to Markdown, and add this code:\n",
36 | "\n",
37 | "```\n",
38 | "# This is a giant headline\n",
39 | "\n",
40 | "This is text.\n",
41 | "\n",
42 | "[This is a link](http://www.google.com/)\n",
43 | "\n",
44 | "```\n",
45 | "\n",
46 | "You can see more of what you can do with Markdown [on this cheatsheet](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet). "
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": []
57 | }
58 | ],
59 | "metadata": {
60 | "anaconda-cloud": {},
61 | "kernelspec": {
62 | "display_name": "R",
63 | "language": "R",
64 | "name": "ir"
65 | },
66 | "language_info": {
67 | "codemirror_mode": "r",
68 | "file_extension": ".r",
69 | "mimetype": "text/x-r-source",
70 | "name": "R",
71 | "pygments_lexer": "r",
72 | "version": "3.4.1"
73 | }
74 | },
75 | "nbformat": 4,
76 | "nbformat_minor": 1
77 | }
78 |
--------------------------------------------------------------------------------
/Assignments/1_Installations/installing_jupyter_notebook.md:
--------------------------------------------------------------------------------
1 | # Installing Jupyter Notebook and R on your computer
2 |
3 | 1. First, download and install Anaconda on your computer. You can download it here: [https://www.continuum.io/downloads](https://www.continuum.io/downloads) WARNING: It will take up 845MB of your hard drive. Install the version for Python 3.5. **IF YOU HAVE ALREADY INSTALL ANACONDA FOR A PREVIOUS CLASS, YOU DO NOT NEED TO DO THIS AGAIN.**
4 | 2. Open your terminal (Mac) or Command Prompt (Windows) and type: `conda list`
5 | 3. If a big stream of text goes by, you've got Anaconda installed.
6 | 4. If you get an error or nothing happens, come find me. **STOP HERE IF YOU GET AN ERROR MESSAGE**.
7 | 5. Let's update Anaconda by typing in your terminal `conda update conda` and then type `y` when it asks if you want to update the packages.
8 | 6. Now let's create an environment for us to work in. Type `conda create --name dataviz python=2 jupyter` into your terminal and say yes to the installation. What this does is it creates a whole new python environment that won't interfere with any python environments you have set up before (Mac users: Apple installs Python 2.7 in your system and messing with it can have disastrous consequences for your machine. Thus, environments).
9 | 7. Activate your new environment by typing `source activate dataviz` on a Mac/Linux and `activate dataviz` on a Windows machine.
10 | 8. Now let's install R and a ton of packages. To do that, we'll use Anaconda's R Essentials. Type `conda install r-essentials` and wait for it to finish.
11 | 9. Let's check if Jupyter Notebook is installed correctly by typing `jupyter notebook` and watching to see if a browser pops up with stuff in it. It should use your default browser.
12 | 10. On the top right of the browser, you should see a dropdown menu called New. Click that, and under Notebooks you should see R.
13 |
14 | If you see R there, then go download [this notebook](https://www.dropbox.com/s/1mn03dbf18llah1/Hello%20World%20in%20R.ipynb?dl=0), open it with Jupyter Notebooks and follow along with it.
15 |
16 | When you are done, go to File > Close and Halt. Then go to your terminal and hit control C and say yes to shutting down he Jupyter server. Your last step of the day: Type `source deactivate` on a Mac/Linux or `deactivate` on Windows. This exits out of your environment and returns you to your normal computer.
17 |
18 | We will do that every time we use this dataviz environment: We'll activate it when we start and deactivate it when we stop.
19 |
--------------------------------------------------------------------------------
/Assignments/18_LatticeMaps/latticemaps.md:
--------------------------------------------------------------------------------
1 | # Lattice maps
2 | ### aka my computer is on fire
3 |
4 | Lattice maps are a very cool way of doing small multiples with maps. But they are not for the uncommitted -- they're large, they require time and processing, and you are going to hear your laptop fan.
5 |
6 | First we'll get set up with what we'll need.
7 |
8 | ```r
9 | library(dplyr)
10 | library(ggplot2)
11 | library(sf)
12 | library(albersusa)
13 | library(repr)
14 | options(repr.plot.width=11, repr.plot.height=8)
15 | ```
16 | Next we'll get our data, which is a file of local area unemployment rates by county from the Bureau of Labor Statistics from Dec. 2016 to Jan. 2018. I've cleaned out some junk from the BLS. [You can download my data here](https://www.dropbox.com/s/j6qiad39f9l27nl/laucntycur14.csv?dl=0).
17 |
18 | ```r
19 | unemp <- read.csv("~/Dropbox/JOUR407-Data-Visualization/Data/laucntycur14.csv", colClasses=c("State"="character", "County"="character"))
20 | ```
21 |
22 | Now, just like our previous mapping exercise, we'll create a `fips` field so we can join the data to the map.
23 |
24 | ```r
25 | geoid <- unemp %>% mutate(
26 | fips = paste(State, County, sep="")
27 | )
28 | ```
29 |
30 | Now we'll bring in a county map.
31 |
32 | ```
33 | county_geom <- counties_sf("aeqd")
34 | ```
35 |
36 | Ths is where lattice maps get different. With a single county map, we just wanted to join the map to the data and we had one county for one mapped county. With lattice maps, we need to join the MAP to the DATA. Meaning we're going to have lots of copies of the map around because we've joined them to the data that has lots of repeats of the county, one for each year the dataset covers.
37 |
38 | ```
39 | nation <- geoid %>% inner_join(county_geom, by="fips")
40 | ```
41 | We'll set up our map theme to get rid of all the cruft.
42 |
43 | ```
44 | theme_map <- theme(
45 | panel.background = element_blank(),
46 | plot.background = element_blank(),
47 | panel.grid.minor = element_blank(),
48 | text = element_text(family = "Helvetica", size=16),
49 | axis.title = element_text(size=12),
50 | axis.ticks = element_blank(),
51 | strip.background = element_blank(),
52 | panel.grid.major = element_line(colour = 'transparent'),
53 | axis.text = element_blank()
54 | )
55 | ```
56 |
57 | This bit is new. Our dates aren't really dates, and we need them in order. So we're going to create a new field called period_f, which is a factor, and we're going to spell out the order we want them to be in.
58 |
59 | ```
60 | nation$period_f = factor(nation$Period, levels=c('Dec-16', 'Jan-17','Feb-17','Mar-17','Apr-17','May-17', 'Jun-17', 'Jul-17', 'Aug-17', 'Sep-17', 'Oct-17', 'Nov-17', 'Dec-17', 'Dec-17 p', 'Jan-18 p'))
61 | ```
62 |
63 | Now, we make a map. If you've followed along until now, get comfortable. This takes a while.
64 |
65 | ```
66 | ggplot(nation) + theme_map + geom_sf(col="transparent", aes(fill=Rate), color = NA) + scale_fill_gradient(low = "#00005C", high = "#F5AD00", guide = FALSE) + facet_wrap(~period_f)
67 | ```
68 |
69 | The facet_wrap works just the same as other facet charts.
70 |
71 | The result:
72 |
73 | 
74 |
--------------------------------------------------------------------------------
/Assignments/17_Maps/maps.md:
--------------------------------------------------------------------------------
1 | # Mapping with R
2 |
3 | First and foremost, I want to be clear on this: Mapping is hard. There is a lot going on, and a lot of external libraries necessary to make it work. So this won't be as easy as working with standard datasets and libraries.
4 |
5 | We're going to borrow heavily from John Burn-Murdoch's [good work](https://twitter.com/jburnmurdoch/status/981074810020204544) that he graciously included the code [here](https://gist.githubusercontent.com/johnburnmurdoch/2dd39f56631ffffe4a99633c76781a1e/raw/d620455ad45b07e15c141318c0a9a437ffeb5096/main.R).
6 |
7 | To do this, we're going to have to get out of Jupyter Notebooks and Anaconda, which has terrible support for various mapping libraries. We're going to use the industry standard tool for using R, which is called R Studio. To install R Studio, we have to first install R by downloading your computer's version [here](https://cran.rstudio.com/).
8 |
9 | Then install RStudio by going [here](https://www.rstudio.com/products/rstudio/download/#download).
10 |
11 | Open R Studio. Here's what it will look like. We're going to do our work in the console on the left.
12 |
13 | 
14 |
15 | Now we have to set up our environment by installing all the packages we're used to plus more.
16 |
17 | ```R
18 | install.packages('devtools')
19 | install.packages('sf')
20 | devtools::install_github("hrbrmstr/albersusa")
21 | devtools::install_github("tidyverse/ggplot2")
22 | install.packages('dplyr')
23 | ```
24 |
25 | Now let's load some libraries up:
26 |
27 | ```R
28 | library('sf')
29 | library('ggplot2')
30 | library('dplyr')
31 | library('utils')
32 | library('magrittr')
33 | library('albersusa')
34 | ```
35 | First things first, let's get some map data from the `sf` library. Then let's get our population data we used in the Live Fire Exercise last time. This code adds a couple of directives that tell R to load a column in as text, not numbers, to preserve the leading zeros. This will be important later.
36 |
37 | ```R
38 | county_geom <- counties_sf("aeqd")
39 |
40 | population <- read.csv(url("https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/counties/totals/co-est2017-alldata.csv"), colClasses=c("STATE"="character", "COUNTY"="character"))
41 | ```
42 | This part is like you are accustomed to. Let's create a new dataframe, filter out statewide totals, create a couple of fields to create a new idenfitier out of state and county fips codes and calculate percent change. Then we'll slim our dataset down to just the fields we need.
43 |
44 | After that, we'll join our new data to the map using the fips code as the join condition.
45 |
46 | ```R
47 | countygeoid <- population %>%
48 | filter(SUMLEV==50) %>%
49 | mutate(
50 | fips = paste(STATE, COUNTY, sep=""),
51 | change = ((POPESTIMATE2017-POPESTIMATE2010)/POPESTIMATE2010)*100
52 | ) %>%
53 | select(c(fips, STNAME, CTYNAME, change))
54 |
55 | nation <- county_geom %>% inner_join(countygeoid, by="fips")
56 | ```
57 |
58 | Now, before we map, let's make a theme for our map, called `map_theme` that gets rid of lots of the extra cruft that `ggplot2` adds to maps.
59 |
60 | ```R
61 | theme_map <- theme(
62 | panel.background = element_blank(),
63 | plot.background = element_blank(),
64 | panel.grid.minor = element_blank(),
65 | text = element_text(family = "Helvetica", size=16),
66 | axis.title = element_text(size=12),
67 | axis.ticks = element_blank(),
68 | strip.background = element_blank(),
69 | panel.grid.major = element_line(colour = 'transparent'),
70 | axis.text = element_blank()
71 | )
72 | ```
73 |
74 | Now let's make a map. After all this, it's similar to what you are used to: use ggplot, tell it the dataframe, then use a special geom called `geom_sf` to render the map. If it's all gone well, we should see a map.
75 |
76 | ```R
77 | ggplot(nation) +
78 | theme_map +
79 | geom_sf(col="transparent", aes(fill=change)) +
80 | scale_fill_distiller(type="div", direction=-1)
81 | ```
82 |
83 | What's the problem here?
84 |
--------------------------------------------------------------------------------
/Data/registeredvoters.csv:
--------------------------------------------------------------------------------
1 | County,Republican10,Democrat10,Libertarian10,Nonpartisan10,Total10,Republican16,Democrat16,Nonpartisan16,Libertarian16,Total16
Adams,10018,5536,6,2972,18532,10746,5027,3591,163,19527
Antelope,3005,1147,0,538,4690,3088,863,594,12,4557
Arthur,284,52,0,10,346,286,37,15,3,341
Banner,424,53,0,53,530,427,38,73,7,545
Blaine,314,56,0,24,394,310,43,29,2,384
Boone,2390,1156,0,408,3954,2469,901,404,11,3785
Box Butte,4115,2347,3,1286,7751,4278,1852,1395,52,7577
Boyd,1036,338,0,129,1503,1084,250,156,4,1494
Brown,1663,363,0,224,2250,1658,253,214,9,2134
Buffalo,15768,6785,14,4537,27104,16974,6453,5461,305,29193
Burt,2521,1694,0,834,5049,2540,1440,878,35,4893
Butler,3044,2134,0,939,6117,3066,1587,913,19,5585
Cass,8216,5435,6,3755,17412,8472,4633,4025,180,17310
Cedar,3072,2231,0,886,6189,3503,1574,869,14,5960
Chase,1827,526,0,263,2616,1830,371,277,8,2486
Cherry,2945,780,0,433,4158,3154,566,455,10,4185
Cheyenne,4146,1484,4,1266,6900,4250,1221,1336,56,6863
Clay,2889,1081,0,629,4599,2743,848,650,17,4258
Colfax,2364,2394,0,587,5345,2332,2018,701,18,5069
Cuming,3639,1485,1,754,5879,3635,1240,889,23,5787
Custer,5450,1871,1,1044,8366,5427,1440,1180,55,8102
Dakota,3577,4238,3,2270,10088,3674,4255,2691,80,10700
Dawes,3505,1422,1,1082,6010,3469,1316,1193,48,6026
Dawson,7666,3620,3,2289,13578,7707,3660,2434,115,13916
Deuel,1027,202,0,214,1443,998,176,207,10,1391
Dixon,1887,1309,0,618,3814,2272,957,545,16,3790
Dodge,10683,7132,2,4059,21876,11009,6265,4669,170,22113
Douglas,120973,125194,123,66976,313266,129688,132466,80315,3497,345966
Dundy,1013,221,0,134,1368,958,155,158,1,1272
Fillmore,2317,1230,0,590,4137,2378,1024,658,18,4078
Franklin,1356,608,1,339,2304,1428,483,360,10,2281
Frontier,1316,389,0,231,1936,1283,299,263,9,1854
Furnas,2240,784,2,484,3510,2291,649,533,19,3492
Gage,7421,5072,0,2709,15202,7166,4091,2778,101,14136
Garden,1156,263,0,131,1550,1091,221,145,5,1462
Garfield,959,244,1,117,1321,966,194,118,3,1281
Gosper,1003,314,0,175,1492,1048,246,181,5,1480
Grant,381,63,0,41,485,418,45,37,0,500
Greeley,578,1073,0,166,1817,626,814,152,2,1594
Hall,15782,9824,2,6066,31674,16436,9538,6803,249,33026
Hamilton,4107,1512,1,838,6458,4365,1252,944,49,6610
Harlan,1559,675,0,301,2535,1575,528,342,7,2452
Hayes,577,110,0,82,769,551,84,84,1,720
Hitchcock,1210,494,0,316,2020,1278,350,356,12,1996
Holt,4981,1549,2,710,7242,5102,1151,723,23,6999
Hooker,464,95,0,50,609,420,80,40,2,542
Howard,2241,1602,1,547,4391,2448,1347,640,21,4456
Jefferson,2775,1688,2,853,5318,2691,1338,922,30,4981
Johnson,1539,1021,0,387,2947,1532,798,422,15,2767
Kearney,2784,1050,1,596,4431,2818,902,685,29,4434
Keith,4054,1396,0,1073,6523,4024,1078,1062,39,6203
Keya Paha,561,94,0,31,686,516,76,44,3,639
Kimball,1987,587,1,451,3026,1920,506,552,11,2989
Knox,3118,2014,0,822,5954,3312,1621,871,18,5822
Lancaster,74342,67217,46,36208,177813,76898,68127,40130,1895,187050
Lincoln,12419,7120,6,3668,23213,13574,5968,4275,195,24012
Logan,415,90,0,54,559,425,70,68,1,564
Loup,400,83,0,35,518,421,77,26,1,525
Madison,12462,5425,5,3480,21372,12548,4888,3892,134,21462
McPherson,300,72,0,15,387,307,40,24,3,374
Merrick,3194,1345,1,729,5269,3213,998,715,28,4954
Morrill,2157,724,1,512,3394,2137,594,525,27,3283
Nance,1242,980,0,322,2544,1269,770,335,9,2383
Nemaha,2664,1609,1,737,5011,2462,1259,817,25,4563
Nuckolls,1786,1078,0,411,3275,1850,838,456,18,3162
Otoe,5350,3350,3,2068,10771,5324,2783,2264,76,10447
Pawnee,1061,618,0,270,1949,1084,494,270,10,1858
Perkins,1316,460,0,283,2059,1368,327,280,7,1982
Phelps,4179,1341,0,815,6335,4297,1048,857,33,6235
Pierce,3188,1025,0,663,4876,3302,766,785,19,4872
Platte,10998,5657,6,2774,19435,12051,4867,3057,141,20116
Polk,2192,908,1,407,3508,2298,730,473,15,3516
Red Willow,4300,1668,4,1355,7327,4621,1240,1341,37,7239
Richardson,3170,2174,1,842,6187,3004,1754,889,22,5669
Rock,829,161,0,71,1061,825,125,73,1,1024
Saline,2940,3709,4,1352,8005,3062,3303,1416,49,7830
Sarpy,43657,27188,40,21900,92785,50057,29538,27620,1258,108473
Saunders,7229,4851,3,2098,14181,8124,3952,2524,96,14696
Scotts Bluff,12734,6821,11,4305,23871,13033,6067,4854,185,24139
Seward,5841,3281,2,1778,10902,6109,2761,1991,65,10926
Sheridan,2710,843,0,557,4110,2755,678,618,15,4066
Sherman,930,996,0,171,2097,983,820,196,11,2010
Sioux,687,127,0,109,923,696,108,107,8,919
Stanton,2517,832,0,510,3859,2454,752,546,30,3782
Thayer,2298,1011,0,480,3789,2316,864,594,17,3791
Thomas,399,75,0,34,508,409,47,49,1,506
Thurston,1324,2514,0,576,4414,1277,2582,660,10,4529
Valley,1904,933,0,340,3177,1895,745,348,7,2995
Washington,7613,3694,1,2626,13934,8201,3212,2794,115,14322
Wayne,3300,1204,0,804,5308,3255,1101,836,44,5236
Webster,1512,757,1,343,2613,1456,574,371,12,2413
Wheeler,451,155,0,48,654,420,127,57,3,607
York,7168,2188,2,1432,10790,6658,1783,1510,62,10013
--------------------------------------------------------------------------------
/Assignments/14_Transparency/Transparency.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Transparency and publication\n",
8 | "\n",
9 | ">__\"You should have a strong command of at least one toolset that (a) allows for filtering, joining, pivoting, and aggregating tabular data, and (b) enables reproducible workflows.\"__ -- Buzzfeed job posting, 2017\n",
10 | "\n",
11 | ">__\"As a general rule, all assertions in a story based on data analysis should be reproducible. The methodology description in the story or accompanying materials should provide a road map to replicate the analysis.\"__ -- The Associated Press Stylebook\n",
12 | "\n",
13 | "Trust in media is low and declining. The reasons for this are myriad, and we can control only what we can control. One thing we can do -- be more transparent about what we do. And this notion goes beyond just journalism -- why should anyone trust what you have to say if you can't show your work? \n",
14 | "\n",
15 | "Jupyter Notebooks are good at this, being able to mix code and text. But your notebooks are currently only visible to you. So we're going to work on improving your notebooks with Markdown and Github.\n",
16 | "\n",
17 | "### Markdown\n",
18 | "\n",
19 | "Markdown is what you are writing in when you aren't writing code in Jupyter Notebooks. It's very simple, and there's only a finite number of things you can do with it, but you can drasically improve your notebooks with some simple typographic tricks. Here's a partial listing of what you can do in Markdown that might be useful in notebooks.\n",
20 | "\n",
21 | "#### Headers\n",
22 | "\n",
23 | "```\n",
24 | "# h1\n",
25 | "## h2\n",
26 | "### h3\n",
27 | "#### h4\n",
28 | "##### h5\n",
29 | "```\n",
30 | "\n",
31 | "Which looks like:\n",
32 | "\n",
33 | "# h1\n",
34 | "## h2\n",
35 | "### h3\n",
36 | "#### h4\n",
37 | "##### h5\n",
38 | "\n",
39 | "#### Blockquotes\n",
40 | "\n",
41 | "To get a block quote, add `>` at the beginning of the line. \n",
42 | "\n",
43 | "It looks like:\n",
44 | "\n",
45 | "> This is a block quote\n",
46 | "\n",
47 | "#### Horizontal rule\n",
48 | "\n",
49 | "You can use a horizontal rule to separate content -- a thematic break. \n",
50 | "\n",
51 | "In Jupyter notebooks, you create a horizontal rule with three dashes: `---`\n",
52 | "\n",
53 | "Which looks like:\n",
54 | "\n",
55 | "---\n",
56 | "\n",
57 | "#### Text treatments\n",
58 | "\n",
59 | "You can **bold** text, _italicize_ text, even ~~strikethrough~~ text with `**bold**`, `_italicize_` and `~~strikethrough~~`. \n",
60 | "\n",
61 | "#### Lists\n",
62 | "\n",
63 | "You can create bulleted or numbered lists like this:\n",
64 | "\n",
65 | "```\n",
66 | "* Bullet 1\n",
67 | "* Bullet 2\n",
68 | "* Bullet 3\n",
69 | "\n",
70 | "1. Numbered list 1\n",
71 | "2. Numbered list 2\n",
72 | "3. Numbered list 3\n",
73 | "```\n",
74 | "Which looks like:\n",
75 | "\n",
76 | "* Bullet 1\n",
77 | "* Bullet 2\n",
78 | "* Bullet 3\n",
79 | "\n",
80 | "And:\n",
81 | "\n",
82 | "1. Numbered list 1\n",
83 | "2. Numbered list 2\n",
84 | "3. Numbered list 3\n",
85 | "\n",
86 | "#### Links\n",
87 | "\n",
88 | "You can add a link like this: `[text to be linked](http://website.com)` \n",
89 | "It looks like this: [text to be linked](http://www.google.com)\n",
90 | "\n",
91 | "#### Tables\n",
92 | "\n",
93 | "Tables are good at showing tabular data. Sounds basic, but people seem to forget tables when there are so many good data visualization options out there. Tables look like this:\n",
94 | "\n",
95 | "```\n",
96 | "| FieldName1 | FieldName2 |\n",
97 | "| ---------- | ---------- |\n",
98 | "| foo | bar |\n",
99 | "| baz | bing |\n",
100 | "| boo | buzz |\n",
101 | "```\n",
102 | "\n",
103 | "And that looks like:\n",
104 | "\n",
105 | "| FieldName1 | FieldName2 |\n",
106 | "| ---------- | ---------- |\n",
107 | "| foo | bar |\n",
108 | "| baz | bing |\n",
109 | "| boo | buzz |\n",
110 | "\n",
111 | "#### Images\n",
112 | "\n",
113 | "The way to handle images in your post is to put the images in the same folder as your Jupyter Notebook and path to them. \n",
114 | "\n",
115 | "To embed an image, it looks like this: `` \n",
116 | "\n",
117 | "\n",
118 | "\n",
119 | "## GitHub\n",
120 | "\n",
121 | "GitHub is a social code sharing website used by millions of developers around the world. It's a place for people to put their code so others can see it, be inspired by it, even participate in it. Other developers can make a copy of your software, improve it and give that back to you. \n",
122 | "\n",
123 | "It's also an ideal place to store your notebooks to foster transparency. With some simple tools, you can publish your notebooks next to your stories so readers who want to know more can see how you did what you did. \n",
124 | "\n",
125 | "You get transparency and replicability in one swoop.\n",
126 | "\n",
127 | "First things first: [Create an account](https://github.com/).\n",
128 | "\n",
129 | "On GitHub you create **repositories** of code. You will have a local copy on your computer, and you'll have a copy on GitHub. You will keep them in sync using **commits** where you will **push** code to GitHub or **pull** it down from Github, depending on which way you need to move code. \n",
130 | "\n",
131 | "Let's make your first repository, just to test it out. First click on the green **New Repository** button. Now we need to give our repostitory a name, a description, and initialize it with a README file. \n",
132 | "\n",
133 | "\n",
134 | "\n",
135 | "### GitHub desktop\n",
136 | "\n",
137 | "For most people, the easiest way to work with GitHub is through their desktop application. [You can download it here](https://desktop.github.com/).\n",
138 | "\n",
139 | "Log into your account via the desktop app. What we first need to do is **clone** our repository to our local machine. \n",
140 | "\n",
141 | "Once logged in, click the plus button in the top right corner and then click Clone. \n",
142 | "\n",
143 | "\n",
144 | "\n",
145 | "Click on your repository from the list and then click Clone your project. Tell GitHub where to clone it -- this is up to you, but make it somewhere you can find it again and do not move it. \n",
146 | "\n",
147 | "Now that we have a clone of it, let's edit the README file. Let's add this sentence: \"I am learning about GitHub.\"\n",
148 | "\n",
149 | "Save the file and go back to GitHub Desktop. You should see you have 1 uncommitted change.\n",
150 | "\n",
151 | "\n",
152 | "\n",
153 | "Click that. You are now going to create a **commit message**, which is like a note to yourself as to what this change is. In this case, we edited README, so add that as the summary and click **Commit to Master**, which is what you are doing. You have a master branch of your code. If, later, you wanted to try something new but didn't want to mess with your existing code, you could create a branch off of master, work there, and if it worked you could roll it back into master. But that's a topic for another day. \n",
154 | "\n",
155 | "Once you have committed to master, you haven't actually sent it to GitHub until you hit the Sync button in the top right. This is the **push** and **pull** parts of GitHub. The desktop app does them all at once. On the command line, these are separate commands. \n",
156 | "\n",
157 | "### Adding your files\n",
158 | "\n",
159 | "With a repository set up like this, you can add your Jupyter Notebooks and other files into the folder and commit them. GitHub will render a notebook as HTML in the browser, which is what makes this an ideal way to do this. \n",
160 | "\n",
161 | "### Assignment\n",
162 | "\n",
163 | "This is how you are going to publish your first story. You are going to combine your code, graphics and text into a single notebook to tell your story. Your notebook should ONLY be the code needed to tell the story -- your scratch work or errors should be in a separate file. You will use Markdown to give it a headline, byline and add your story text between your graphics. You will embed your finished graphics -- if you do them in ggplot or fix them up in Illustrator is up to you -- in the notebook. When it's done, it should be ready to publish in a particularly nerdy publication that likes R code mixed in with stories. \n",
164 | "\n",
165 | "You do not need to turn in anything for this assignment, but for your first major assignment, you will turn in the GitHub URL for your project. It will look something like this: [https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb)\n"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {
172 | "collapsed": true
173 | },
174 | "outputs": [],
175 | "source": []
176 | }
177 | ],
178 | "metadata": {
179 | "anaconda-cloud": {},
180 | "kernelspec": {
181 | "display_name": "R",
182 | "language": "R",
183 | "name": "ir"
184 | },
185 | "language_info": {
186 | "codemirror_mode": "r",
187 | "file_extension": ".r",
188 | "mimetype": "text/x-r-source",
189 | "name": "R",
190 | "pygments_lexer": "r",
191 | "version": "3.4.1"
192 | }
193 | },
194 | "nbformat": 4,
195 | "nbformat_minor": 1
196 | }
197 |
--------------------------------------------------------------------------------
/Assignments/14_Transparency/.ipynb_checkpoints/Transparency-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Transparency and publication\n",
8 | "\n",
9 | ">__\"You should have a strong command of at least one toolset that (a) allows for filtering, joining, pivoting, and aggregating tabular data, and (b) enables reproducible workflows.\"__ -- Buzzfeed job posting, 2017\n",
10 | "\n",
11 | ">__\"As a general rule, all assertions in a story based on data analysis should be reproducible. The methodology description in the story or accompanying materials should provide a road map to replicate the analysis.\"__ -- The Associated Press Stylebook\n",
12 | "\n",
13 | "Trust in media is low and declining. The reasons for this are myriad, and we can control only what we can control. One thing we can do -- be more transparent about what we do. And this notion goes beyond just journalism -- why should anyone trust what you have to say if you can't show your work? \n",
14 | "\n",
15 | "Jupyter Notebooks are good at this, being able to mix code and text. But your notebooks are currently only visible to you. So we're going to work on improving your notebooks with Markdown and Github.\n",
16 | "\n",
17 | "### Markdown\n",
18 | "\n",
19 | "Markdown is what you are writing in when you aren't writing code in Jupyter Notebooks. It's very simple, and there's only a finite number of things you can do with it, but you can drasically improve your notebooks with some simple typographic tricks. Here's a partial listing of what you can do in Markdown that might be useful in notebooks.\n",
20 | "\n",
21 | "#### Headers\n",
22 | "\n",
23 | "```\n",
24 | "# h1\n",
25 | "## h2\n",
26 | "### h3\n",
27 | "#### h4\n",
28 | "##### h5\n",
29 | "```\n",
30 | "\n",
31 | "Which looks like:\n",
32 | "\n",
33 | "# h1\n",
34 | "## h2\n",
35 | "### h3\n",
36 | "#### h4\n",
37 | "##### h5\n",
38 | "\n",
39 | "#### Blockquotes\n",
40 | "\n",
41 | "To get a block quote, add `>` at the beginning of the line. \n",
42 | "\n",
43 | "It looks like:\n",
44 | "\n",
45 | "> This is a block quote\n",
46 | "\n",
47 | "#### Horizontal rule\n",
48 | "\n",
49 | "You can use a horizontal rule to separate content -- a thematic break. \n",
50 | "\n",
51 | "In Jupyter notebooks, you create a horizontal rule with three dashes: `---`\n",
52 | "\n",
53 | "Which looks like:\n",
54 | "\n",
55 | "---\n",
56 | "\n",
57 | "#### Text treatments\n",
58 | "\n",
59 | "You can **bold** text, _italicize_ text, even ~~strikethrough~~ text with `**bold**`, `_italicize_` and `~~strikethrough~~`. \n",
60 | "\n",
61 | "#### Lists\n",
62 | "\n",
63 | "You can create bulleted or numbered lists like this:\n",
64 | "\n",
65 | "```\n",
66 | "* Bullet 1\n",
67 | "* Bullet 2\n",
68 | "* Bullet 3\n",
69 | "\n",
70 | "1. Numbered list 1\n",
71 | "2. Numbered list 2\n",
72 | "3. Numbered list 3\n",
73 | "```\n",
74 | "Which looks like:\n",
75 | "\n",
76 | "* Bullet 1\n",
77 | "* Bullet 2\n",
78 | "* Bullet 3\n",
79 | "\n",
80 | "And:\n",
81 | "\n",
82 | "1. Numbered list 1\n",
83 | "2. Numbered list 2\n",
84 | "3. Numbered list 3\n",
85 | "\n",
86 | "#### Links\n",
87 | "\n",
88 | "You can add a link like this: `[text to be linked](http://website.com)` \n",
89 | "It looks like this: [text to be linked](http://www.google.com)\n",
90 | "\n",
91 | "#### Tables\n",
92 | "\n",
93 | "Tables are good at showing tabular data. Sounds basic, but people seem to forget tables when there are so many good data visualization options out there. Tables look like this:\n",
94 | "\n",
95 | "```\n",
96 | "| FieldName1 | FieldName2 |\n",
97 | "| ---------- | ---------- |\n",
98 | "| foo | bar |\n",
99 | "| baz | bing |\n",
100 | "| boo | buzz |\n",
101 | "```\n",
102 | "\n",
103 | "And that looks like:\n",
104 | "\n",
105 | "| FieldName1 | FieldName2 |\n",
106 | "| ---------- | ---------- |\n",
107 | "| foo | bar |\n",
108 | "| baz | bing |\n",
109 | "| boo | buzz |\n",
110 | "\n",
111 | "#### Images\n",
112 | "\n",
113 | "The way to handle images in your post is to put the images in the same folder as your Jupyter Notebook and path to them. \n",
114 | "\n",
115 | "To embed an image, it looks like this: `` \n",
116 | "\n",
117 | "\n",
118 | "\n",
119 | "## GitHub\n",
120 | "\n",
121 | "GitHub is a social code sharing website used by millions of developers around the world. It's a place for people to put their code so others can see it, be inspired by it, even participate in it. Other developers can make a copy of your software, improve it and give that back to you. \n",
122 | "\n",
123 | "It's also an ideal place to store your notebooks to foster transparency. With some simple tools, you can publish your notebooks next to your stories so readers who want to know more can see how you did what you did. \n",
124 | "\n",
125 | "You get transparency and replicability in one swoop.\n",
126 | "\n",
127 | "First things first: [Create an account](https://github.com/).\n",
128 | "\n",
129 | "On GitHub you create **repositories** of code. You will have a local copy on your computer, and you'll have a copy on GitHub. You will keep them in sync using **commits** where you will **push** code to GitHub or **pull** it down from Github, depending on which way you need to move code. \n",
130 | "\n",
131 | "Let's make your first repository, just to test it out. First click on the green **New Repository** button. Now we need to give our repostitory a name, a description, and initialize it with a README file. \n",
132 | "\n",
133 | "\n",
134 | "\n",
135 | "### GitHub desktop\n",
136 | "\n",
137 | "For most people, the easiest way to work with GitHub is through their desktop application. [You can download it here](https://desktop.github.com/).\n",
138 | "\n",
139 | "Log into your account via the desktop app. What we first need to do is **clone** our repository to our local machine. \n",
140 | "\n",
141 | "Once logged in, click the plus button in the top right corner and then click Clone. \n",
142 | "\n",
143 | "\n",
144 | "\n",
145 | "Click on your repository from the list and then click Clone your project. Tell GitHub where to clone it -- this is up to you, but make it somewhere you can find it again and do not move it. \n",
146 | "\n",
147 | "Now that we have a clone of it, let's edit the README file. Let's add this sentence: \"I am learning about GitHub.\"\n",
148 | "\n",
149 | "Save the file and go back to GitHub Desktop. You should see you have 1 uncommitted change.\n",
150 | "\n",
151 | "\n",
152 | "\n",
153 | "Click that. You are now going to create a **commit message**, which is like a note to yourself as to what this change is. In this case, we edited README, so add that as the summary and click **Commit to Master**, which is what you are doing. You have a master branch of your code. If, later, you wanted to try something new but didn't want to mess with your existing code, you could create a branch off of master, work there, and if it worked you could roll it back into master. But that's a topic for another day. \n",
154 | "\n",
155 | "Once you have committed to master, you haven't actually sent it to GitHub until you hit the Sync button in the top right. This is the **push** and **pull** parts of GitHub. The desktop app does them all at once. On the command line, these are separate commands. \n",
156 | "\n",
157 | "### Adding your files\n",
158 | "\n",
159 | "With a repository set up like this, you can add your Jupyter Notebooks and other files into the folder and commit them. GitHub will render a notebook as HTML in the browser, which is what makes this an ideal way to do this. \n",
160 | "\n",
161 | "### Assignment\n",
162 | "\n",
163 | "This is how you are going to publish your first story. You are going to combine your code, graphics and text into a single notebook to tell your story. Your notebook should ONLY be the code needed to tell the story -- your scratch work or errors should be in a separate file. You will use Markdown to give it a headline, byline and add your story text between your graphics. You will embed your finished graphics -- if you do them in ggplot or fix them up in Illustrator is up to you -- in the notebook. When it's done, it should be ready to publish in a particularly nerdy publication that likes R code mixed in with stories. \n",
164 | "\n",
165 | "You do not need to turn in anything for this assignment, but for your first major assignment, you will turn in the GitHub URL for your project. It will look something like this: [https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb)\n"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {
172 | "collapsed": true
173 | },
174 | "outputs": [],
175 | "source": []
176 | }
177 | ],
178 | "metadata": {
179 | "anaconda-cloud": {},
180 | "kernelspec": {
181 | "display_name": "R",
182 | "language": "R",
183 | "name": "ir"
184 | },
185 | "language_info": {
186 | "codemirror_mode": "r",
187 | "file_extension": ".r",
188 | "mimetype": "text/x-r-source",
189 | "name": "R",
190 | "pygments_lexer": "r",
191 | "version": "3.4.1"
192 | }
193 | },
194 | "nbformat": 4,
195 | "nbformat_minor": 1
196 | }
197 |
--------------------------------------------------------------------------------
/Data/mountainlions.csv:
--------------------------------------------------------------------------------
1 | ID,Cofirm Type,COUNTY,Date
1,Track,Dawes,9/14/91
2,Mortality,Sioux,11/10/91
3,Mortality,Scotts Bluff,4/21/96
4,Mortality,Sioux,5/9/99
5,Mortality,Box Butte,9/29/99
6,Track,Scotts Bluff,11/12/99
7,Track,Howard,2/26/00
8,Track,Scotts Bluff,9/15/00
9,Mortality,Howard,11/20/00
10,Photo,Brown,12/1/01
11,Trail Camera Photo,Brown,6/1/02
12,Captured,Douglas,10/1/03
13,Track,Cherry,1/6/04
14,Mortality,Thomas,4/18/04
15,Track,Dawes,5/12/04
16,Trail Camera Photo,Keya Paha,9/10/04
17,Mortality,Dakota,11/23/04
18,Track,Dawes,1/31/05
19,Track,Brown,2/2/05
20,Photo,Dawes,6/7/05
21,Track,Dawes,7/25/05
22,Track,Dawes,8/28/05
23,Mortality,Sarpy,11/6/05
24,Trail Camera Photo,Sioux,11/22/05
25,Track,Dawes,1/5/06
26,Trail Camera Photo,Sioux,1/11/06
27,Trail Camera Photo,Dawes,2/17/06
28,Trail Camera Photo,Dawes,3/20/06
29,Trail Camera Photo,Scotts Bluff,4/4/06
30,Track,Custer,4/22/06
31,Mortality,Cherry,6/27/06
32,Mortality,Scotts Bluff,10/2/06
33,DNA,Dawes,10/23/06
34,Trail Camera Photo,Dawes,12/6/06
35,Track,Sheridan,12/20/06
36,Mortality,Dawes,2/28/07
37,Trail Camera Photo,Dawes,2/27/07
38,Trail Camera Photo,Dawes,2/27/07
39,Trail Camera Photo,Dawes,2/27/07
40,Trail Camera Photo,Sioux,3/27/07
41,Trail Camera Photo,Sioux,4/18/07
42,Mortality,Dawes,7/10/07
43,Trail Camera Photo,Sioux,7/18/07
44,Track,Dawes,10/10/07
45,Trail Camera Photo,Sioux,9/18/07
46,Trail Camera Photo,Banner,10/31/07
47,Mortality,Scotts Bluff,2/7/08
48,Mortality,Dawes,1/1/02
49,Mortality,Dawes,12/1/06
50,Track,Dawes,4/15/08
51,Trail Camera Photo,Knox,5/11/08
52,Track,Dawes,5/28/08
53,Track,Dawes,7/16/08
54,DNA,Dawes,5/22/08
55,Trail Camera Photo,Dawes,10/22/08
56,Trail Camera Photo,Sioux,9/7/08
57,Trail Camera Photo,Sioux,10/29/08
58,Mortality,Dawes,11/22/08
59,DNA,Sheridan,11/4/08
60,Prey,Dawes,10/12/08
61,Track,Sheridan,12/4/08
62,Track,Dawes,12/6/08
63,Track,Sheridan,12/10/08
64,Track,Dawes,12/11/08
65,Track,Dawes,11/16/08
66,Trail Camera Photo,Dawes,12/20/08
67,Trail Camera Photo,Dawes,12/20/08
68,Photo,Nance,1/12/09
69,Mortality,Scotts Bluff,1/25/09
70,Track,Platte,1/29/09
71,Trail Camera Photo,Sheridan,12/10/08
72,Trail Camera Photo,Sioux,3/2/09
73,Track,Dawes,4/27/09
74,Trail Camera Photo,Dawes,5/9/09
75,Trail Camera Photo,Dawes,5/9/09
76,Trail Camera Photo,Sioux,5/5/09
77,Prey,Dawes,6/23/09
78,DNA,Dawes,4/19/09
79,Track,Dawes,7/30/09
80,Trail Camera Photo,Sheridan,8/5/09
81,Photo,Dawes,8/6/09
82,Photo,Dawes,9/4/09
83,Trail Camera Photo,Sheridan,9/2/09
84,Trail Camera Photo,Sheridan,9/2/09
85,Track,Cherry,9/29/09
86,Prey,Dawes,9/30/09
87,Trail Camera Photo,Sheridan,8/15/09
88,Trail Camera Photo,Sioux,11/1/09
89,Trail Camera Photo,Dawes,11/6/09
90,Prey,Custer,12/17/09
91,Track,Dawson,12/18/09
92,Trail Camera Photo,Dawes,11/19/09
93,Prey,Dawes,12/8/09
94,Trail Camera Photo,Sheridan,12/21/09
95,Trail Camera Photo,Sheridan,12/21/09
96,Mortality,Sheridan,2/21/10
97,Track,Thomas,2/6/10
98,Track,Dawes,2/9/10
99,Track,Dawes,2/27/10
100,DNA,Dawes,10/7/09
101,DNA,Sheridan,11/28/09
102,DNA,Dawes,11/26/09
103,Trail Camera Photo,Rock,5/8/10
104,Trail Camera Photo,Sioux,4/2/10
105,Photo,Hooker,4/1/09
106,Trail Camera Photo,Sheridan,5/19/10
107,Trail Camera Photo,Dawes,7/31/10
108,Trail Camera Photo,Dawes,8/13/10
109,Trail Camera Photo,Lincoln,8/8/10
110,DNA,Cherry,4/7/10
111,Trail Camera Photo,Dawes,8/15/10
112,Trail Camera Photo,Custer,8/25/10
113,Trail Camera Photo,Dawson,9/9/10
114,Photo,Dawes,9/11/10
115,Mortality,Box Butte,9/14/10
116,Mortality,Dawes,9/17/10
117,Track,Banner,10/8/10
118,Mortality,Scotts Bluff,10/19/10
119,Trail Camera Photo,Sheridan,10/15/10
120,Trail Camera Photo,Sheridan,9/11/10
121,Trail Camera Photo,Sheridan,9/20/10
122,Mortality,Dawes,10/25/10
123,Trail Camera Photo,Dawson,9/24/10
124,Trail Camera Photo,Custer,10/15/10
125,Trail Camera Photo,Polk,11/8/10
126,Trail Camera Photo,Howard,10/24/10
127,Trail Camera Photo,Valley,11/1/10
128,Mortality,Sioux,11/17/10
129,Trail Camera Photo,Sheridan,9/11/10
130,Trail Camera Photo,Sherman,10/1/10
131,Photo,Sioux,12/21/10
132,Trail Camera Photo,Dawson,12/23/10
133,Trail Camera Photo,Cherry,11/2/10
134,DNA,Dawes,7/28/10
135,Photo,Dawes,2/10/11
136,Photo,Dawes,2/11/11
137,Trail Camera Photo,Dawes,3/4/11
138,Photo,Blaine,4/25/11
139,Mortality,Saunders,4/30/11
140,Mortality,Buffalo,5/9/11
141,Trail Camera Photo,Sheridan,5/10/11
142,Trail Camera Photo,sheridan,5/10/11
143,Track,Lincoln,7/20/11
144,Track,Dawes,6/20/11
145,DNA,sheridan,4/7/11
146,Trail Camera Photo,Dawes,8/8/11
147,Track,Keya Paha,8/24/11
148,Trail Camera Photo,Dawes,8/31/11
149,Trail Camera Photo,Dawes,9/30/11
150,DNA,Sioux,3/17/11
151,Mortality,Knox,11/12/11
152,Trail Camera Photo,Sioux,9/13/11
153,Trail Camera Photo,Sioux,9/27/11
154,Trail Camera Photo,Sioux,11/19/11
155,Trail Camera Photo,Keya Paha,10/30/11
156,Trail Camera Photo,Sioux,10/31/11
157,Trail Camera Photo,Sioux,10/31/11
158,Trail Camera Photo,Brown,11/29/11
159,Trail Camera Photo,Sioux,9/28/11
160,Trail Camera Photo,Sioux,9/28/11
161,Trail Camera Photo,Keya Paha,11/27/11
162,Mortality,Dawes,12/26/11
163,Photo,Dawes,12/26/11
164,Photo,Dawes,12/26/11
165,Trail Camera Photo,Sioux,12/11/11
166,Trail Camera Photo,Sioux,12/11/11
167,Trail Camera Photo,Sioux,12/11/11
168,Trail Camera Photo,Sioux,11/30/11
169,Trail Camera Photo,Dawes,1/5/12
170,Trail Camera Photo,Rock,1/12/12
171,Trail Camera Photo,Brown,12/18/11
172,Trail Camera Photo,Keya Paha,10/23/11
173,Photo,Dawes,1/27/12
174,Trail Camera Photo,Rock,10/16/11
175,Photo,Dawes,2/9/12
176,DNA,Dawes,8/12/11
177,Mortality,Dawes,2/12/12
178,Trail Camera Photo,Sioux,12/11/11
179,Track,Thomas,2/15/12
180,Track,Sheridan,1/18/12
181,Track,Sheridan,1/18/12
182,Track,Thomas,7/15/11
183,Trail Camera Photo,Thurston,11/23/11
184,Trail Camera Photo,Scotts Bluff,3/3/12
185,Track,Thomas,2/12/12
186,DNA,Dawes,2/13/12
187,Track,Sioux,2/24/12
188,Trail Camera Photo,Dawes,3/23/12
189,Trail Camera Photo,Sioux,3/27/12
190,Trail Camera Photo,Sioux,3/27/12
191,Trail Camera Photo,Sioux,3/27/12
192,Trail Camera Photo,Dixon,2/4/12
193,Trail Camera Photo,Sioux,3/15/12
194,Trail Camera Photo,Custer,1/22/12
195,Trail Camera Photo,Custer,2/9/12
196,Trail Camera Photo,Custer,4/15/12
197,Trail Camera Photo,Holt,4/30/12
198,Trail Camera Photo,Dawes,5/11/12
199,Photo,Banner,7/8/12
200,Trail Camera Photo,Sioux,7/19/12
201,Trail Camera Photo,Dawes,8/16/12
202,Mortality,Kimball,8/18/12
203,Trail Camera Photo,Morrill,8/19/12
204,Trail Camera Photo,Sioux,7/31/12
205,Mortality,Scotts Bluff,8/27/12
206,DNA,Box Butte,8/23/12
207,Mortality,Sheridan,9/16/12
208,Photo,Sheridan,8/30/12
209,Photo,Sheridan,8/30/12
210,Photo,Sheridan,8/30/12
211,Trail Camera Photo,Lincoln,8/21/12
212,Trail Camera Photo,Scotts Bluff,9/24/12
213,Mortality,Sheridan,10/15/12
214,Trail Camera Photo,Rock,10/16/12
215,Mortality,Rock,11/10/12
216,Prey,Sheridan,9/3/12
217,Trail Camera Photo,Sioux,10/23/12
218,Trail Camera Photo,Cedar,10/28/12
219,Trail Camera Photo,Knox,11/14/12
220,Trail Camera Photo,Rock,11/25/12
221,Trail Camera Photo,Dawes,11/21/12
222,Trail Camera Photo,Sioux,12/1/12
223,Trail Camera Photo,Lincoln,12/12/12
224,Trail Camera Photo,Sheridan,11/13/12
225,Track,Scotts Bluff,1/5/13
226,Mortality,Scotts Bluff,1/9/13
227,DNA,Sioux,11/8/12
228,Track,Scotts Bluff,1/12/13
229,Track,Scotts Bluff,1/29/13
230,Mortality,Sheridan,2/8/13
231,Trail Camera Photo,Lincoln,11/24/12
232,Trail Camera Photo,Sheridan,7/26/12
233,Track,Cherry,1/23/13
234,Trail Camera Photo,Sioux,1/12/13
235,DNA,Dawes,1/7/13
236,Track,Cherry,2/7/13
237,Track,Cherry,2/25/13
238,Track,Dawes,2/25/13
239,Mortality,Dawes,3/28/13
240,Trail Camera Photo,Cherry,3/19/13
241,Photo,Dawes,5/4/13
242,DNA,Cherry,1/10/13
243,DNA,Scotts Bluff,2/20/13
244,Trail Camera Photo,Dawes,6/11/13
245,Trail Camera Photo,Sheridan,5/30/13
246,Trail Camera Photo,Morrill,7/18/13
247,Mortality,Sheridan,7/30/13
248,Track,Keith,8/4/13
249,DNA,Dawes,8/4/13
251,Prey,Dawes,8/19/13
250,Trail Camera Photo,Knox,10/10/12
252,Trail Camera Photo,Scotts Bluff,5/29/13
253,Trail Camera Photo,Scotts Bluff,7/13/13
254,Trail Camera Photo,Keya Paha,7/19/13
255,Trail Camera Photo,Keya Paha,8/26/13
256,Trail Camera Photo,Saunders,9/19/13
257,Trail Camera Photo,Dawes,10/24/13
258,Trail Camera Photo,Merrick,9/7/13
259,Photo,Dawes,8/15/13
260,Trail Camera Photo,Cherry,9/25/13
261,Trail Camera Photo,Cherry,11/8/13
262,Trail Camera Photo,Lincoln,10/24/13
263,Mortality,Sioux,12/20/13
264,Trail Camera Photo,Dawes,11/28/13
265,Mortality,Dawes,1/2/14
266,Mortality,Sioux,1/2/14
267,Trail Camera Photo,Keya Paha,10/24/13
268,Trail Camera Photo,Dawes,1/9/14
269,Track,Lincoln,2/3/14
270,Mortality,Sioux,2/1/14
271,Mortality,Custer,2/16/14
272,Track,Cherry,2/20/14
273,Trail Camera Photo,Brown,12/17/13
274,Trail Camera Photo,Brown,1/2/14
275,Mortality,Sheridan,2/26/14
276,Trail Camera Photo,Brown,2/24/14
277,Trail Camera Photo,Brown,3/12/14
278,Mortality,Sheridan,3/21/14
279,Trail Camera Photo,Dawes,12/1/13
280,Trail Camera Photo,Dawes,12/1/13
281,Trail Camera Photo,Dawes,4/5/14
282,Trail Camera Photo,Knox,3/22/14
283,Trail Camera Photo,Scotts Bluff,5/15/14
284,Trail Camera Photo,Scotts Bluff,5/15/14
285,Trail Camera Photo,Scotts Bluff,5/15/14
286,Mortality,Dawes,5/2/14
287,Trail Camera Photo,Dawes,4/28/14
288,DNA,Blaine,4/2/14
289,Trail Camera Photo,Keya Paha,1/18/14
290,Trail Camera Photo,Keya Paha,4/16/14
291,DNA,Cherry,3/3/14
292,DNA,Dawes,3/29/13
293,Mortality,Sioux,6/22/14
294,Trail Camera Photo,Keya Paha,5/21/14
295,Trail Camera Photo,Keya Paha,5/4/14
296,Trail Camera Photo,Keya Paha,6/18/13
297,Trail Camera Photo,Dawes,4/23/13
298,Mortality,Dawes,7/19/14
299,Mortality,Dawes,8/2/14
300,Photo,Box Butte,8/6/14
301,Trail Camera Photo,Knox,8/5/14
302,Trail Camera Photo,Knox,8/11/14
303,Mortality,Dawes,9/6/14
304,Trail Camera Photo,Hall,9/21/14
305,Trail Camera Photo,Holt,9/17/14
306,Mortality,Knox,10/5/14
307,Track,Dixon,8/31/14
308,Mortality,Wheeler,10/9/14
309,Trail Camera Photo,Dawes,10/14/14
310,Trail Camera Photo,Dawes,10/14/14
311,Trail Camera Photo,Dawes,10/14/14
312,Trail Camera Photo,Scotts Bluff,8/20/14
313,Photo,Cherry,10/24/14
314,Mortality,Dawes,10/23/14
315,Trail Camera Photo,Dakota,10/25/14
316,Trail Camera Photo,Keya Paha,7/8/14
317,Mortality,Dakota,11/17/14
318,Trail Camera Photo,Brown,11/4/14
319,Trail Camera Photo,Cherry,9/7/14
320,Trail Camera Photo,Banner,11/9/14
321,Track,Cherry,11/18/14
322,Trail Camera Photo,Keya Paha,8/27/14
323,Trail Camera Photo,Keya Paha,12/3/14
324,Trail Camera Photo,Dixon,11/10/14
325,Trail Camera Photo,Cherry,10/3/14
326,Trail Camera Photo,Cherry,10/30/14
327,Trail Camera Photo,Cherry,11/23/14
328,Trail Camera Photo,Cherry,12/2/14
329,Trail Camera Photo,Cherry,11/3/14
330,Trail Camera Photo,Cherry,10/26/14
331,Photo,Dawes,12/8/14
332,Mortality,Dawes,12/10/14
333,Track,Rock,12/21/14
334,Trail Camera Photo,Banner,12/14/14
335,Trail Camera Photo,Dawes,12/17/14
336,Track,Dawson,12/27/14
337,Track,Brown,12/28/14
338,Trail Camera Photo,Lincoln,12/26/14
339,Trail Camera Photo,Brown,12/24/14
340,Trail Camera Photo,Rock,12/22/14
341,Trail Camera Photo,Rock,11/30/14
342,Trail Camera Photo,Rock,4/4/14
343,Track,Rock,11/19/14
344,Track,Brown,12/25/14
345,Track,Buffalo,1/24/15
346,Trail Camera Photo,Scotts Bluff,6/13/14
347,Trail Camera Photo,Scotts Bluff,6/29/14
348,Trail Camera Photo,Richardson,1/22/15
349,Track,Buffalo,1/17/15
350,Trail Camera Photo,Dawes,1/19/15
351,Trail Camera Photo,Lincoln,12/6/14
352,Photo,Brown,3/10/15
353,Trail Camera Photo,Banner,1/22/15
354,Mortality,Sheridan,3/8/15
355,Mortality,Douglas,5/6/15
356,Mortality,Sioux,4/25/15
357,Research,Scotts Bluff,2/2/15
358,Research,Scotts Bluff,2/3/15
359,Research,Dawes,2/10/15
360,Research,Dawes,3/8/15
361,Research,Dawes,3/19/15
362,Research,Dawes,5/16/15
363,Trail Camera Photo,Cherry,1/28/15
364,Trail Camera Photo,Cherry,2/19/15
365,Trail Camera Photo,Cherry,3/31/15
366,Trail Camera Photo,Cherry,4/13/15
367,Trail Camera Photo,Cherry,5/8/15
368,Trail Camera Photo,Cherry,5/26/15
369,Track,Blaine,2/8/15
370,Trail Camera Photo,Keya Paha,4/4/15
371,Track,Cherry,2/20/15
372,Trail Camera Photo,Dawes,2/15/15
373,Photo,Lincoln,6/2/15
374,Trail Camera Photo,Dawes,6/29/15
375,Trail Camera Photo,Sioux,4/25/15
376,Trail Camera Photo,Sioux,11/15/14
377,Trail Camera Photo,Sioux,11/7/14
378,Trail Camera Photo,Sioux,2/5/15
379,Trail Camera Photo,Sioux,3/14/15
380,Trail Camera Photo,Sioux,3/15/15
381,Trail Camera Photo,Sioux,3/18/15
382,DNA,Keya Paha,2/15/15
383,Trail Camera Photo,Nemaha,7/18/15
384,Trail Camera Photo,Frontier,7/10/15
385,Trail Camera Photo,Nemaha,7/2/15
386,Trail Camera Photo,Nemaha,7/28/15
387,Trail Camera Photo,Nemaha,9/3/15
389,Trail Camera Photo,Richardson,8/6/15
388,Trail Camera Photo,Nemaha,9/15/15
390,Trail Camera Photo,Keya Paha,8/11/15
391,Trail Camera Photo,Brown,8/28/15
392,Trail Camera Photo,Keya Paha,4/24/15
393,Research,Dawes,10/11/15
--------------------------------------------------------------------------------
/syllabus.md:
--------------------------------------------------------------------------------
1 | # JOUR 407/807 Data Visualization
2 | __Spring 2018__
3 | __T-Th 3:30 - 4:45 p.m.__
4 | __ANDN 27__
5 |
6 | Instructor: Matt Waite
7 | Email: mwaite3@unl.edu
8 | Twitter: @mattwaite
9 | Phones: (402) 802-5202 cell, (402) 472-5840 office
10 | Office: 244 Andersen Hall
11 |
12 |
__Course description:__
13 |
14 |
Welcome to data visualization, where you'll learn to use storytelling, analysis and visualization techniques to inform people with data. In this class, you'll learn what makes for good visualizations and how you can develop a deeper understanding of a topic through a combination of words and graphics, forming a new kind of narrative on the web.
15 |
16 | __Course goals:__
17 |
18 | * Understand the basics of data and data visualization
19 | * Understand the history of data visualization techniques
20 | * Master basic data analysis and visualization tools
21 | * Get hands on experience with more advanced tools
22 | * Publish visual stories using learned techniques to the web via single page web applications
23 |
24 |
__Required texts:__
25 |
26 |
* The Visual Display of Quantitative Information by Edward Tufte
27 | * The Functional Art: An introduction to information graphics and visualization by Alberto Cairo
28 |
29 |
__Other requirements:__
30 |
31 | * Administrative privileges on a computer so you can install software
32 | * A GitHub account (free)
33 | * A Google account (free)
34 | * A sense of humor (also free)
35 |
36 | __News Nerd Slack:__
37 |
38 | I use [Slack](https://slack.com/) as a way for students to get help from each other and me when we're not in class or not even in the building. I have one News Nerd Slack that includes my current code class, students of past classes and alumni who have gone on to do data journalism professionally. You'll be added to the slack during the semester, and at the end, I'll remove you unless you tell me otherwise. If you like it, stay. If not, no worries and no judgements.
39 |
40 |
__Class opportunities:__
41 |
42 |
The opportunities in this class are to experiment with storytelling in a way you have never done before. Creativity and ambition will be rewarded. Copy and pasting will not. As such, this class will require to you try a lot of things on your own. If you've never done this before, it can be a little daunting starting out. To help you, I've set up a time called [Maker Hours](http://www.makerhours.org) where you can come, hang out, learn something new and get help with it. The information is in the link, but the short version is: Most Friday afternoons, room 27, Andersen Hall. This time is open to any student wanting to learn programming, data viz, hardware hacking -- anything in the digital journalism space that can help them tell stories on the web. It's not required for you to show up at Maker Hours, but if you're struggling, stuck, frustrated or just really interested in learning more, come hang out. I promise you it's worth the time.
43 |
44 |
__Grading:__
45 |
46 | I use the standard grading scale.
47 |
48 | |Grade|Percentage|
49 | |-----|----------|
50 | |A+|97-100|
51 | |A|93-96|
52 | |A-|90-92|
53 | |B+|87-89|
54 | |B|83-86|
55 | |B-|80-82|
56 | |C+|77-79|
57 | |C|73-76|
58 | |C-|70-72|
59 | |D+|67-69|
60 | |D|63-66|
61 | |D-|60-62|
62 | |F|59 or below|
63 |
64 | However, your final letter grade will be weighted:
65 |
66 | Assignment|Weight
67 | ----------|------
68 | Reading quizzes|10%
69 | Assignments|60%
70 | Final assignment|30%
71 |
72 |
You will be graded on effort, creativity and clarity of your work. Work hard, try new things, let your effort show through in the work and you'll do fine. Mail it in and your grade will reflect it.
73 |
74 |
A note: Not reflected in the grade weights is how I handle things like attendance and class participation. I do not take attendance, but I know if you are there or not. I do not record class participation, but I do know if you take part in class regularly and contribute. So when it comes time to submit grades, and you're right on the line between a B+ and A-, it's your attendance and participation that will sway me to round up or down. Show up, take part, be present and I round up, sometimes pretty generously. Sit on your phone, skip class, never talk and your grade is your grade, I don't care how close you are.
75 |
76 |
__Final Project:__
77 |
The skills and the ideas we're going to discuss in this class are all building toward a final project where you will tell a story with data. The absolute minimum requirements of this final project are:
78 |
79 | * Tell a story with data using visual and narrative techniques discussed in the class.
80 | * This story must use three different types of visualization.
81 | * This story must have multiple data sources.
82 | * The story must use Tarbell for publication.
83 |
84 | You will be graded on:
85 |
86 | * The story
87 | * The techniques you use
88 | * The sophistication of the visualizations
89 | * The creativity you show
90 | * The effort you put in, evidenced by scrum participation, GitHub code check-ins, the questions you ask, etc.
91 |
92 |
__Notes on attendance__
93 |
94 | Yes, we all get sick. Yes, things happen. I don’t want you to be sick in my class any more than you want to be sick. You’ve got no fewer than five ways to get ahold of me. If you are going to miss class, tell me before class. We’ll work it out. But you have to tell me before class for me to help you.
This said: this class builds each class onto the next one. Miss a class and you are behind. We’re going to be covering a lot of new material in this class. Miss one at your own peril. Assume that you missed something important. And know it is entirely on you to find out what you missed and how to catch up.
95 |
96 |
__Policies__
97 |
98 |
Here's the short version.
You cheat, you fail, no exceptions.
If I’m doing something that’s keeping you from learning, tell me. Tell the Dean. Tell someone, because that’s not cool. I won’t tolerate it from myself and you shouldn’t either.
99 |
Now the longer versions.
100 |
101 |
__ACEJMC Competencies__
102 |
103 |
After this class, you should be able to:
104 |
* Understand concepts and apply theories in the use and presentation of images and information;
105 | * Demonstrate an understanding of professional ethical principles and work ethically in pursuit of truth, accuracy, fairness and diversity;
106 | * Think critically, creatively and independently;
107 | * Conduct research and evaluate information by methods appropriate to the communications professions in which they work;
108 | * Critically evaluate their own work and that of others for accuracy and fairness, clarity, appropriate style and grammatical correctness;
109 | * Apply basic numerical and statistical concepts;
110 | * Apply tools and technologies appropriate for the communications professions in which they work.
111 |
112 |
__Academic integrity:__
113 |
114 | Every student must adhere to the policy on academic integrity set forth in the UNL Student Code of Conduct as outlined in the UNL Bulletin. Students who plagiarize may receive a failing grade on an assignment or for an entire course and may be reported to the Student Judicial Review Board. The work a student submits in a class must be the student's own work and must be work completed for that particular class and assignment. Students wishing to build on an old project or work on a similar project in two classes must discuss this with both professors. Academic dishonesty includes:
115 |
116 | * Handing in another's work or part of another's work as your own.
* Turning in one of your old papers (including something you wrote in high school) for a current class.
117 | * Turning in the same or similar paper for two different classes,
* Using notes or other study aids or otherwise obtaining another's answers for a quiz or an examination.
118 |
Anything and everything you include in your work that comes from another source must be attributed with proper citation. That includes ideas and opinions.
Plagiarism consists of using phrases, sentences or paragraphs from any source and republishing them without alteration or attribution. The sources include, but are not limited to, books, magazines, newspapers, television or radio reports, Web sites and other students’ papers.
119 |
120 |
__Students with disabilities__
121 |
122 |
Students with disabilities are encouraged to contact the instructor for a confidential discussion of their individual needs for academic accommodation. It is the policy of the University of Nebraska-Lincoln to provide flexible and individualized accommodation to students with documented disabilities that may affect their ability to fully participate in course activities or meet course requirements. To receive accommodation services, students must be registered with the Services for Students with Disabilities (SSD) office, 132 Canfield Administration, 472-3787 voice or TTY.
123 |
124 |
__Diversity__
125 |
126 |
The College of Journalism and Mass Communications values diversity, in the broadest sense of the word – gender, age, race, ethnicity, nationality, income, religion, education, geographic, physical and mental ability or disability, sexual orientation. We recognize that understanding and incorporating diversity in the curriculum enables us to prepare our students for careers as professional communicators in a global society. As communicators, we understand that journalism, advertising and other forms of strategic communication must reflect society in order to be effective and reliable. We fail as journalists if we are not accurate in our written, spoken and visual reports; including diverse voices and perspectives improves our accuracy and truthfulness. In advertising, we cannot succeed if we do not understand the value of or know how to create advertising that reflects a diverse society and, thus, appeals to broader audiences.
127 |
128 | ## Course schedule
129 |
130 | This is __very tentative__ and __will change__.
131 |
132 | ### Jan. 9, 2018: Intro, syllabus
133 |
134 | **In class:** Introductions, syllabus, requirements, what is data visualization?
135 |
136 | **Assignment:**
137 |
138 | * Go buy the books. Now now now now. There will be a quiz on Thursday on the readings.
139 | * Read Tufte Chapter 1, Graphical Excellence
140 | * Read Cairo Chapter 1: Why Visualize.
141 | * Install [Slack](https://slack.com/get) on your computer and your phone.
142 |
143 | ### Jan. 11, 2018: Graphical Excellence
144 |
145 | **In class:** Quiz. What are the specific elements of graphical excellence that make for a good data visualization? These are the foundational principles of the semester.
146 |
147 | **Assignment:**
148 |
149 | * Read [the layered grammar of graphics](http://byrneslab.net/classes/biol607/readings/wickham_layered-grammar.pdf) by Hadley Wickham
150 | * Do assignment 1 -- [installing Jupyter Notebook](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/1_Installations/installing_jupyter_notebook.md).
151 | * [Download this notebook](https://www.dropbox.com/s/1mn03dbf18llah1/Hello%20World%20in%20R.ipynb?dl=0), open it in Jupyter Notebook and walk through it.
152 |
153 | ### Jan. 16, 2018: R basics
154 |
155 | **In class:** Basic data analysis in R
156 |
157 | **Assignment:** Do the first R basics assignment
158 |
159 | ### Jan. 18, 2018: R basics 2
160 |
161 | **In class:** Basic data analysis in R, part 2
162 |
163 | **Assignment:** Do the second R basics assignment
164 |
165 | ### Jan. 23, 2018: Calculating new values
166 |
167 | **In class:** Mutate in R
168 |
169 | **Assignment:** Do the calculating percent change assignment
170 |
171 | ### Jan. 25, 2018: Working with dates
172 |
173 | **In class:** Why are dates such a problem?
174 |
175 | **Assignment:** Do the working with dates assignment
176 |
177 | ### Jan. 30, 2018: Recasting data
178 |
179 | **In class:** Narrow vs wide data
180 |
181 | **Assignment:** Do the reshape2 assignment
182 |
183 | ### Feb. 1, 2018: Intro to ggplot2
184 |
185 | **In class:** The Grammar of Graphics in R
186 |
187 | **Assignment:** Do the intro to ggplot2 assignment
188 |
189 | ### Feb. 6, 2018: More ggplot2
190 |
191 | **In class:** The Grammar of Graphics in R
192 |
193 | **Assignment:** Do the second ggplot2 assignment
194 |
195 | ### Feb. 8, 2018: Layers in ggplot2
196 |
197 | **In class:** Layering data in ggplot2
198 |
199 | **Assignment:** Do the ggplot2 layers assignment
200 |
201 | ### Feb. 13, 2018: Styling ggplot2 graphics in R
202 |
203 | **In class:** The Grammar of Graphics in R
204 |
205 | **Assignment:** Do the ggplot2 styling assignment
206 |
207 | ### Feb. 15, 2018: Styling ggplot2 graphics in Illustrator
208 |
209 | **In class:** Graphical finishing school
210 |
211 | **Assignment:** Do the styling in Illustrator assignment
212 |
213 | ### Feb. 20, 2018: Visual storytelling I
214 |
215 | **In class:** Guest speaker: Theo Francis, Wall Street Journal.
216 |
217 | **Assignment:** Reaction paper on Francis talk, prepare a story pitch for your first visual story.
218 |
219 | ### Feb. 22, 2018: Visual storytelling II
220 |
221 | **In class:** Story pitches, guest speaker TBA
222 |
223 | **Assignment:** Reaction paper, start working on your visual story
224 |
225 | ### Feb. 27, 2018: Information to wisdom
226 |
227 | **In class:** Discussion of Cairo. Tufte.
228 |
229 | **Assignment:**
230 |
231 | * Read Cairo Chapter 2
232 | * Read Tufte Chapter 4
233 | * Quiz in next class covering readings.
234 |
235 | ### March 1, 2018: Art and complexity
236 |
237 | **In class:** Discussion of Cairo readings
238 |
239 | **Assignment:** Read Cairo Chapter 6.
240 |
241 | ### March 6, 2018: Data visualization and cognition
242 |
243 | **In class:** Quiz on readings. Discussion of your brain on visualizations.
244 |
245 | **Assignment:**
246 |
247 | ### March 8, 2018: Tarbell and online publication
248 |
249 | **In class:** How we're going to publish visual stories
250 |
251 | **Assignment:** Do the tarbell assignment
252 |
253 | ### March 13, 2018: Github and transparency
254 |
255 | **In class:** Showing your work
256 |
257 | **Assignment:** Do the github assignment
258 |
259 | ### March 15, 2018: Story edits and production
260 |
261 | Sign up for your meeting time. Prepare a pitch for your second visual story.
262 |
263 | ### March 20, 2018:
264 |
265 | **No class: Spring Break**
266 |
267 | ### March 22, 2018:
268 |
269 | **No class: Spring Break**
270 |
271 | ### March 27, 2018: Critique
272 |
273 | **In class:** In class critique of published work. Story pitches
274 |
275 | **Assignment:**
276 |
277 | Read Tufte Chapter 2, 3, 5, 6
278 |
279 | ### March 29, 2018: Chart Junk
280 |
281 | **In class:** Discussion of Tufte readings.
282 |
283 | **Assignment:** TBA
284 |
285 | ### April 3, 2018: Lying with charts
286 |
287 | **In class:** Quiz on readings. Discussion of Tufte readings.
288 |
289 | **Assignment:** Look for three data visualizations -- in print, online, wherever -- and bring to class on Thursday. Read the [Junk Chart Trifecta](http://junkcharts.typepad.com/junk_charts/junk-charts-trifecta-checkup-the-definitive-guide.html) and be prepared to discuss charts based on this rubric. Read Shazna Nessa's [Visual Literacy In the Age of Data](https://source.opennews.org/en-US/learning/visual-literacy-age-data/)
290 |
291 | ### April 5, 2018: Working with maps 1
292 |
293 | ### April 10, 2018: Working with maps 2
294 |
295 | ### April 12, 2018: Working with maps 3
296 |
297 | ### April 17, 2018: Working with maps 4
298 |
299 | ### April 19, 2018: Other visualizations
300 |
301 | ### April 24, 2018: Work time
302 |
303 | In class work time and question answering.
304 |
305 | ### April 26, 2018: Story edits and production
306 |
307 | Sign up for your meeting time.
308 |
--------------------------------------------------------------------------------
/Assignments/2_R_Basics/RBasics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Basic data analysis in R\n",
8 | "\n",
9 | "R is a statistical programming language that is purpose built for data analysis. \n",
10 | "\n",
11 | "Base R does a lot, but there are a mountain of external libraries that do things to make R better/easier/more fully featured. One of the best libraries, in your professor's opinion, is `dplyr`, a library for working with data. To use dplyr, you need to import it. "
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "name": "stderr",
21 | "output_type": "stream",
22 | "text": [
23 | "\n",
24 | "Attaching package: ‘dplyr’\n",
25 | "\n",
26 | "The following objects are masked from ‘package:stats’:\n",
27 | "\n",
28 | " filter, lag\n",
29 | "\n",
30 | "The following objects are masked from ‘package:base’:\n",
31 | "\n",
32 | " intersect, setdiff, setequal, union\n",
33 | "\n"
34 | ]
35 | }
36 | ],
37 | "source": [
38 | "library(dplyr)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "The first thing we need to do is get some data to work with. We do that by reading it in. In our case, we're going to read data from a csv file -- a comma-separated values file. \n",
46 | "\n",
47 | "The code looks like this: \n",
48 | "\n",
49 | "`mountainlions <- read.csv(\"../../Data/mountainlions.csv\")`\n",
50 | "\n",
51 | "Let's unpack that. \n",
52 | "\n",
53 | "The first part -- `mountainlions` -- is the name of your variable. A variable is just a name of a thing. In this case, our variable is a data frame, which is R's way of storing data. We can call this whatever we want. I always want to name data frames after what is in it. In this case, we're going to import a dataset of mountain lion sightings from the Nebraska Game and Parks Commission.\n",
54 | "\n",
55 | "The `<-` bit is the variable assignment operator. It's how we know we're assigning something to a word. \n",
56 | "\n",
57 | "The `read.csv` bits are pretty obvious. What happens in the quote marks is the path to the data. In there, I have to tell R where it find the data. The easiest thing to do, if you are confused about how to find your data, is to put your data in the same folder as as your notebook. In my case, I've got a folder called Data that's two levels up from my work folder. So the `../` means move up one level. So move up one level, move up one level, find Data, then in there is a file called mountainlions.csv. \n",
58 | "\n",
59 | "What you put in there will be different from mine. So your first task is to import the data. "
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 2,
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "mountainlions <- read.csv(\"../../Data/mountainlions.csv\")"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "Now we can inspect the data we imported. What does it look like? To do that, we use `head(mountainlions)` to show the headers and the first six rows of data. If we wanted to see them all, we could just simply enter `mountainlions` and run it. \n",
78 | "\n",
79 | "To get the number of records in our dataset, we run `nrow(mountainlions)`"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 10,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/html": [
90 | "
\n",
91 | "| ID | Cofirm.Type | COUNTY | Date |
\n",
92 | "\n",
93 | "\t| 1 | Track | Dawes | 9/14/91 |
\n",
94 | "\t| 2 | Mortality | Sioux | 11/10/91 |
\n",
95 | "\t| 3 | Mortality | Scotts Bluff | 4/21/96 |
\n",
96 | "\t| 4 | Mortality | Sioux | 5/9/99 |
\n",
97 | "\t| 5 | Mortality | Box Butte | 9/29/99 |
\n",
98 | "\t| 6 | Track | Scotts Bluff | 11/12/99 |
\n",
99 | "\n",
100 | "
\n"
101 | ],
102 | "text/latex": [
103 | "\\begin{tabular}{r|llll}\n",
104 | " ID & Cofirm.Type & COUNTY & Date\\\\\n",
105 | "\\hline\n",
106 | "\t 1 & Track & Dawes & 9/14/91 \\\\\n",
107 | "\t 2 & Mortality & Sioux & 11/10/91 \\\\\n",
108 | "\t 3 & Mortality & Scotts Bluff & 4/21/96 \\\\\n",
109 | "\t 4 & Mortality & Sioux & 5/9/99 \\\\\n",
110 | "\t 5 & Mortality & Box Butte & 9/29/99 \\\\\n",
111 | "\t 6 & Track & Scotts Bluff & 11/12/99 \\\\\n",
112 | "\\end{tabular}\n"
113 | ],
114 | "text/markdown": [
115 | "\n",
116 | "ID | Cofirm.Type | COUNTY | Date | \n",
117 | "|---|---|---|---|---|---|\n",
118 | "| 1 | Track | Dawes | 9/14/91 | \n",
119 | "| 2 | Mortality | Sioux | 11/10/91 | \n",
120 | "| 3 | Mortality | Scotts Bluff | 4/21/96 | \n",
121 | "| 4 | Mortality | Sioux | 5/9/99 | \n",
122 | "| 5 | Mortality | Box Butte | 9/29/99 | \n",
123 | "| 6 | Track | Scotts Bluff | 11/12/99 | \n",
124 | "\n",
125 | "\n"
126 | ],
127 | "text/plain": [
128 | " ID Cofirm.Type COUNTY Date \n",
129 | "1 1 Track Dawes 9/14/91 \n",
130 | "2 2 Mortality Sioux 11/10/91\n",
131 | "3 3 Mortality Scotts Bluff 4/21/96 \n",
132 | "4 4 Mortality Sioux 5/9/99 \n",
133 | "5 5 Mortality Box Butte 9/29/99 \n",
134 | "6 6 Track Scotts Bluff 11/12/99"
135 | ]
136 | },
137 | "metadata": {},
138 | "output_type": "display_data"
139 | },
140 | {
141 | "data": {
142 | "text/html": [
143 | "393"
144 | ],
145 | "text/latex": [
146 | "393"
147 | ],
148 | "text/markdown": [
149 | "393"
150 | ],
151 | "text/plain": [
152 | "[1] 393"
153 | ]
154 | },
155 | "metadata": {},
156 | "output_type": "display_data"
157 | }
158 | ],
159 | "source": [
160 | "head(mountainlions)\n",
161 | "nrow(mountainlions)"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "So what if we wanted to know how many mountain lion sightings there were in each county? To do that by hand, we'd have to take each of the 393 records and sort them into a pile. We'd put them in groups and then count them. \n",
169 | "\n",
170 | "`dplyr` has a group by function in it that does just this. A massive amount of data analysis involves grouping like things together at some point. So it's a good place to start. \n",
171 | "\n",
172 | "So to do this, we'll take our dataset and we'll introduce a new operator: `%>%`. The best way to read that operator, in my opinion, is to interpret that as \"and then do this.\" Here's the code: "
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 11,
178 | "metadata": {},
179 | "outputs": [
180 | {
181 | "data": {
182 | "text/html": [
183 | "\n",
184 | "| COUNTY | count |
\n",
185 | "\n",
186 | "\t| Banner | 6 |
\n",
187 | "\t| Blaine | 3 |
\n",
188 | "\t| Box Butte | 4 |
\n",
189 | "\t| Brown | 15 |
\n",
190 | "\t| Buffalo | 3 |
\n",
191 | "\t| Cedar | 1 |
\n",
192 | "\t| Cherry | 30 |
\n",
193 | "\t| Custer | 8 |
\n",
194 | "\t| Dakota | 3 |
\n",
195 | "\t| Dawes | 111 |
\n",
196 | "\t| Dawson | 5 |
\n",
197 | "\t| Dixon | 3 |
\n",
198 | "\t| Douglas | 2 |
\n",
199 | "\t| Frontier | 1 |
\n",
200 | "\t| Hall | 1 |
\n",
201 | "\t| Holt | 2 |
\n",
202 | "\t| Hooker | 1 |
\n",
203 | "\t| Howard | 3 |
\n",
204 | "\t| Keith | 1 |
\n",
205 | "\t| Keya Paha | 20 |
\n",
206 | "\t| Kimball | 1 |
\n",
207 | "\t| Knox | 8 |
\n",
208 | "\t| Lincoln | 10 |
\n",
209 | "\t| Merrick | 1 |
\n",
210 | "\t| Morrill | 2 |
\n",
211 | "\t| Nance | 1 |
\n",
212 | "\t| Nemaha | 5 |
\n",
213 | "\t| Platte | 1 |
\n",
214 | "\t| Polk | 1 |
\n",
215 | "\t| Richardson | 2 |
\n",
216 | "\t| Rock | 11 |
\n",
217 | "\t| Sarpy | 1 |
\n",
218 | "\t| Saunders | 2 |
\n",
219 | "\t| Scotts Bluff | 26 |
\n",
220 | "\t| sheridan | 2 |
\n",
221 | "\t| Sheridan | 35 |
\n",
222 | "\t| Sherman | 1 |
\n",
223 | "\t| Sioux | 52 |
\n",
224 | "\t| Thomas | 5 |
\n",
225 | "\t| Thurston | 1 |
\n",
226 | "\t| Valley | 1 |
\n",
227 | "\t| Wheeler | 1 |
\n",
228 | "\n",
229 | "
\n"
230 | ],
231 | "text/latex": [
232 | "\\begin{tabular}{r|ll}\n",
233 | " COUNTY & count\\\\\n",
234 | "\\hline\n",
235 | "\t Banner & 6 \\\\\n",
236 | "\t Blaine & 3 \\\\\n",
237 | "\t Box Butte & 4 \\\\\n",
238 | "\t Brown & 15 \\\\\n",
239 | "\t Buffalo & 3 \\\\\n",
240 | "\t Cedar & 1 \\\\\n",
241 | "\t Cherry & 30 \\\\\n",
242 | "\t Custer & 8 \\\\\n",
243 | "\t Dakota & 3 \\\\\n",
244 | "\t Dawes & 111 \\\\\n",
245 | "\t Dawson & 5 \\\\\n",
246 | "\t Dixon & 3 \\\\\n",
247 | "\t Douglas & 2 \\\\\n",
248 | "\t Frontier & 1 \\\\\n",
249 | "\t Hall & 1 \\\\\n",
250 | "\t Holt & 2 \\\\\n",
251 | "\t Hooker & 1 \\\\\n",
252 | "\t Howard & 3 \\\\\n",
253 | "\t Keith & 1 \\\\\n",
254 | "\t Keya Paha & 20 \\\\\n",
255 | "\t Kimball & 1 \\\\\n",
256 | "\t Knox & 8 \\\\\n",
257 | "\t Lincoln & 10 \\\\\n",
258 | "\t Merrick & 1 \\\\\n",
259 | "\t Morrill & 2 \\\\\n",
260 | "\t Nance & 1 \\\\\n",
261 | "\t Nemaha & 5 \\\\\n",
262 | "\t Platte & 1 \\\\\n",
263 | "\t Polk & 1 \\\\\n",
264 | "\t Richardson & 2 \\\\\n",
265 | "\t Rock & 11 \\\\\n",
266 | "\t Sarpy & 1 \\\\\n",
267 | "\t Saunders & 2 \\\\\n",
268 | "\t Scotts Bluff & 26 \\\\\n",
269 | "\t sheridan & 2 \\\\\n",
270 | "\t Sheridan & 35 \\\\\n",
271 | "\t Sherman & 1 \\\\\n",
272 | "\t Sioux & 52 \\\\\n",
273 | "\t Thomas & 5 \\\\\n",
274 | "\t Thurston & 1 \\\\\n",
275 | "\t Valley & 1 \\\\\n",
276 | "\t Wheeler & 1 \\\\\n",
277 | "\\end{tabular}\n"
278 | ],
279 | "text/markdown": [
280 | "\n",
281 | "COUNTY | count | \n",
282 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
283 | "| Banner | 6 | \n",
284 | "| Blaine | 3 | \n",
285 | "| Box Butte | 4 | \n",
286 | "| Brown | 15 | \n",
287 | "| Buffalo | 3 | \n",
288 | "| Cedar | 1 | \n",
289 | "| Cherry | 30 | \n",
290 | "| Custer | 8 | \n",
291 | "| Dakota | 3 | \n",
292 | "| Dawes | 111 | \n",
293 | "| Dawson | 5 | \n",
294 | "| Dixon | 3 | \n",
295 | "| Douglas | 2 | \n",
296 | "| Frontier | 1 | \n",
297 | "| Hall | 1 | \n",
298 | "| Holt | 2 | \n",
299 | "| Hooker | 1 | \n",
300 | "| Howard | 3 | \n",
301 | "| Keith | 1 | \n",
302 | "| Keya Paha | 20 | \n",
303 | "| Kimball | 1 | \n",
304 | "| Knox | 8 | \n",
305 | "| Lincoln | 10 | \n",
306 | "| Merrick | 1 | \n",
307 | "| Morrill | 2 | \n",
308 | "| Nance | 1 | \n",
309 | "| Nemaha | 5 | \n",
310 | "| Platte | 1 | \n",
311 | "| Polk | 1 | \n",
312 | "| Richardson | 2 | \n",
313 | "| Rock | 11 | \n",
314 | "| Sarpy | 1 | \n",
315 | "| Saunders | 2 | \n",
316 | "| Scotts Bluff | 26 | \n",
317 | "| sheridan | 2 | \n",
318 | "| Sheridan | 35 | \n",
319 | "| Sherman | 1 | \n",
320 | "| Sioux | 52 | \n",
321 | "| Thomas | 5 | \n",
322 | "| Thurston | 1 | \n",
323 | "| Valley | 1 | \n",
324 | "| Wheeler | 1 | \n",
325 | "\n",
326 | "\n"
327 | ],
328 | "text/plain": [
329 | " COUNTY count\n",
330 | "1 Banner 6 \n",
331 | "2 Blaine 3 \n",
332 | "3 Box Butte 4 \n",
333 | "4 Brown 15 \n",
334 | "5 Buffalo 3 \n",
335 | "6 Cedar 1 \n",
336 | "7 Cherry 30 \n",
337 | "8 Custer 8 \n",
338 | "9 Dakota 3 \n",
339 | "10 Dawes 111 \n",
340 | "11 Dawson 5 \n",
341 | "12 Dixon 3 \n",
342 | "13 Douglas 2 \n",
343 | "14 Frontier 1 \n",
344 | "15 Hall 1 \n",
345 | "16 Holt 2 \n",
346 | "17 Hooker 1 \n",
347 | "18 Howard 3 \n",
348 | "19 Keith 1 \n",
349 | "20 Keya Paha 20 \n",
350 | "21 Kimball 1 \n",
351 | "22 Knox 8 \n",
352 | "23 Lincoln 10 \n",
353 | "24 Merrick 1 \n",
354 | "25 Morrill 2 \n",
355 | "26 Nance 1 \n",
356 | "27 Nemaha 5 \n",
357 | "28 Platte 1 \n",
358 | "29 Polk 1 \n",
359 | "30 Richardson 2 \n",
360 | "31 Rock 11 \n",
361 | "32 Sarpy 1 \n",
362 | "33 Saunders 2 \n",
363 | "34 Scotts Bluff 26 \n",
364 | "35 sheridan 2 \n",
365 | "36 Sheridan 35 \n",
366 | "37 Sherman 1 \n",
367 | "38 Sioux 52 \n",
368 | "39 Thomas 5 \n",
369 | "40 Thurston 1 \n",
370 | "41 Valley 1 \n",
371 | "42 Wheeler 1 "
372 | ]
373 | },
374 | "metadata": {},
375 | "output_type": "display_data"
376 | }
377 | ],
378 | "source": [
379 | "mountainlions %>%\n",
380 | " group_by(COUNTY) %>%\n",
381 | " summarise(\n",
382 | " count = n(),\n",
383 | " ) "
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "So let's walk through that. We start with our dataset -- `mountainlions` -- and then we tell it to group the data by a given field in the data. In this case, we wanted to group together all the counties, signified by the field name COUNTY, which you could get from looking at `head(mountainlions)`. So after we group the data, we need to count them up. In dplyr, we use `summarize` [which can do more than just count things](http://dplyr.tidyverse.org/reference/summarise.html). So inside the parentheses in summarize, we set up the summaries we want. In this case, we just want a count of the counties. So `count = n(),` says create a new field, called `count` and set it equal to `n()`, which might look weird, but it's common in stats. The number of things in a dataset? Statisticians call in n. There are n number of incidents in this dataset. So `n()` is a function that counts the number of things there are. \n",
391 | "\n",
392 | "And when we run that, we get a list of counties with a count next to them. But it's not in any order. So we'll add another And Then Do This %>% and use `arrange`. Arrange does what you think it does -- it arranges data in order. By default, it's in ascending order -- smallest to largest. But if we want to know the county with the most mountain lion sightings, we need to sort it in descending order. That looks like this:"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": 12,
398 | "metadata": {},
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/html": [
403 | "\n",
404 | "| COUNTY | count |
\n",
405 | "\n",
406 | "\t| Dawes | 111 |
\n",
407 | "\t| Sioux | 52 |
\n",
408 | "\t| Sheridan | 35 |
\n",
409 | "\t| Cherry | 30 |
\n",
410 | "\t| Scotts Bluff | 26 |
\n",
411 | "\t| Keya Paha | 20 |
\n",
412 | "\t| Brown | 15 |
\n",
413 | "\t| Rock | 11 |
\n",
414 | "\t| Lincoln | 10 |
\n",
415 | "\t| Custer | 8 |
\n",
416 | "\t| Knox | 8 |
\n",
417 | "\t| Banner | 6 |
\n",
418 | "\t| Dawson | 5 |
\n",
419 | "\t| Nemaha | 5 |
\n",
420 | "\t| Thomas | 5 |
\n",
421 | "\t| Box Butte | 4 |
\n",
422 | "\t| Blaine | 3 |
\n",
423 | "\t| Buffalo | 3 |
\n",
424 | "\t| Dakota | 3 |
\n",
425 | "\t| Dixon | 3 |
\n",
426 | "\t| Howard | 3 |
\n",
427 | "\t| Douglas | 2 |
\n",
428 | "\t| Holt | 2 |
\n",
429 | "\t| Morrill | 2 |
\n",
430 | "\t| Richardson | 2 |
\n",
431 | "\t| Saunders | 2 |
\n",
432 | "\t| sheridan | 2 |
\n",
433 | "\t| Cedar | 1 |
\n",
434 | "\t| Frontier | 1 |
\n",
435 | "\t| Hall | 1 |
\n",
436 | "\t| Hooker | 1 |
\n",
437 | "\t| Keith | 1 |
\n",
438 | "\t| Kimball | 1 |
\n",
439 | "\t| Merrick | 1 |
\n",
440 | "\t| Nance | 1 |
\n",
441 | "\t| Platte | 1 |
\n",
442 | "\t| Polk | 1 |
\n",
443 | "\t| Sarpy | 1 |
\n",
444 | "\t| Sherman | 1 |
\n",
445 | "\t| Thurston | 1 |
\n",
446 | "\t| Valley | 1 |
\n",
447 | "\t| Wheeler | 1 |
\n",
448 | "\n",
449 | "
\n"
450 | ],
451 | "text/latex": [
452 | "\\begin{tabular}{r|ll}\n",
453 | " COUNTY & count\\\\\n",
454 | "\\hline\n",
455 | "\t Dawes & 111 \\\\\n",
456 | "\t Sioux & 52 \\\\\n",
457 | "\t Sheridan & 35 \\\\\n",
458 | "\t Cherry & 30 \\\\\n",
459 | "\t Scotts Bluff & 26 \\\\\n",
460 | "\t Keya Paha & 20 \\\\\n",
461 | "\t Brown & 15 \\\\\n",
462 | "\t Rock & 11 \\\\\n",
463 | "\t Lincoln & 10 \\\\\n",
464 | "\t Custer & 8 \\\\\n",
465 | "\t Knox & 8 \\\\\n",
466 | "\t Banner & 6 \\\\\n",
467 | "\t Dawson & 5 \\\\\n",
468 | "\t Nemaha & 5 \\\\\n",
469 | "\t Thomas & 5 \\\\\n",
470 | "\t Box Butte & 4 \\\\\n",
471 | "\t Blaine & 3 \\\\\n",
472 | "\t Buffalo & 3 \\\\\n",
473 | "\t Dakota & 3 \\\\\n",
474 | "\t Dixon & 3 \\\\\n",
475 | "\t Howard & 3 \\\\\n",
476 | "\t Douglas & 2 \\\\\n",
477 | "\t Holt & 2 \\\\\n",
478 | "\t Morrill & 2 \\\\\n",
479 | "\t Richardson & 2 \\\\\n",
480 | "\t Saunders & 2 \\\\\n",
481 | "\t sheridan & 2 \\\\\n",
482 | "\t Cedar & 1 \\\\\n",
483 | "\t Frontier & 1 \\\\\n",
484 | "\t Hall & 1 \\\\\n",
485 | "\t Hooker & 1 \\\\\n",
486 | "\t Keith & 1 \\\\\n",
487 | "\t Kimball & 1 \\\\\n",
488 | "\t Merrick & 1 \\\\\n",
489 | "\t Nance & 1 \\\\\n",
490 | "\t Platte & 1 \\\\\n",
491 | "\t Polk & 1 \\\\\n",
492 | "\t Sarpy & 1 \\\\\n",
493 | "\t Sherman & 1 \\\\\n",
494 | "\t Thurston & 1 \\\\\n",
495 | "\t Valley & 1 \\\\\n",
496 | "\t Wheeler & 1 \\\\\n",
497 | "\\end{tabular}\n"
498 | ],
499 | "text/markdown": [
500 | "\n",
501 | "COUNTY | count | \n",
502 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
503 | "| Dawes | 111 | \n",
504 | "| Sioux | 52 | \n",
505 | "| Sheridan | 35 | \n",
506 | "| Cherry | 30 | \n",
507 | "| Scotts Bluff | 26 | \n",
508 | "| Keya Paha | 20 | \n",
509 | "| Brown | 15 | \n",
510 | "| Rock | 11 | \n",
511 | "| Lincoln | 10 | \n",
512 | "| Custer | 8 | \n",
513 | "| Knox | 8 | \n",
514 | "| Banner | 6 | \n",
515 | "| Dawson | 5 | \n",
516 | "| Nemaha | 5 | \n",
517 | "| Thomas | 5 | \n",
518 | "| Box Butte | 4 | \n",
519 | "| Blaine | 3 | \n",
520 | "| Buffalo | 3 | \n",
521 | "| Dakota | 3 | \n",
522 | "| Dixon | 3 | \n",
523 | "| Howard | 3 | \n",
524 | "| Douglas | 2 | \n",
525 | "| Holt | 2 | \n",
526 | "| Morrill | 2 | \n",
527 | "| Richardson | 2 | \n",
528 | "| Saunders | 2 | \n",
529 | "| sheridan | 2 | \n",
530 | "| Cedar | 1 | \n",
531 | "| Frontier | 1 | \n",
532 | "| Hall | 1 | \n",
533 | "| Hooker | 1 | \n",
534 | "| Keith | 1 | \n",
535 | "| Kimball | 1 | \n",
536 | "| Merrick | 1 | \n",
537 | "| Nance | 1 | \n",
538 | "| Platte | 1 | \n",
539 | "| Polk | 1 | \n",
540 | "| Sarpy | 1 | \n",
541 | "| Sherman | 1 | \n",
542 | "| Thurston | 1 | \n",
543 | "| Valley | 1 | \n",
544 | "| Wheeler | 1 | \n",
545 | "\n",
546 | "\n"
547 | ],
548 | "text/plain": [
549 | " COUNTY count\n",
550 | "1 Dawes 111 \n",
551 | "2 Sioux 52 \n",
552 | "3 Sheridan 35 \n",
553 | "4 Cherry 30 \n",
554 | "5 Scotts Bluff 26 \n",
555 | "6 Keya Paha 20 \n",
556 | "7 Brown 15 \n",
557 | "8 Rock 11 \n",
558 | "9 Lincoln 10 \n",
559 | "10 Custer 8 \n",
560 | "11 Knox 8 \n",
561 | "12 Banner 6 \n",
562 | "13 Dawson 5 \n",
563 | "14 Nemaha 5 \n",
564 | "15 Thomas 5 \n",
565 | "16 Box Butte 4 \n",
566 | "17 Blaine 3 \n",
567 | "18 Buffalo 3 \n",
568 | "19 Dakota 3 \n",
569 | "20 Dixon 3 \n",
570 | "21 Howard 3 \n",
571 | "22 Douglas 2 \n",
572 | "23 Holt 2 \n",
573 | "24 Morrill 2 \n",
574 | "25 Richardson 2 \n",
575 | "26 Saunders 2 \n",
576 | "27 sheridan 2 \n",
577 | "28 Cedar 1 \n",
578 | "29 Frontier 1 \n",
579 | "30 Hall 1 \n",
580 | "31 Hooker 1 \n",
581 | "32 Keith 1 \n",
582 | "33 Kimball 1 \n",
583 | "34 Merrick 1 \n",
584 | "35 Nance 1 \n",
585 | "36 Platte 1 \n",
586 | "37 Polk 1 \n",
587 | "38 Sarpy 1 \n",
588 | "39 Sherman 1 \n",
589 | "40 Thurston 1 \n",
590 | "41 Valley 1 \n",
591 | "42 Wheeler 1 "
592 | ]
593 | },
594 | "metadata": {},
595 | "output_type": "display_data"
596 | }
597 | ],
598 | "source": [
599 | "mountainlions %>%\n",
600 | " group_by(COUNTY) %>%\n",
601 | " summarise(\n",
602 | " count = n(),\n",
603 | " ) %>% arrange(desc(count))"
604 | ]
605 | },
606 | {
607 | "cell_type": "markdown",
608 | "metadata": {},
609 | "source": [
610 | "## Assignment\n",
611 | "\n",
612 | "Answer this question using what you have learned in this walkthrough. \n",
613 | "\n",
614 | "**What are the most common incidents UNL police reported from 2013 and 2016?**\n",
615 | "\n",
616 | "To do this, you'll need to [download this data](https://www.dropbox.com/s/47zogziohseavh7/unlcrime.csv?dl=0). \n",
617 | "\n",
618 | "#### Rubric\n",
619 | "\n",
620 | "1. Did you read the data into a dataframe? \n",
621 | "2. Did you use group by syntax correctly? \n",
622 | "3. Did you use summarize syntax correctly?\n",
623 | "4. Did you use arrange syntax correctly?\n",
624 | "5. Did you use Markdown comments to explain your steps? "
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": null,
630 | "metadata": {
631 | "collapsed": true
632 | },
633 | "outputs": [],
634 | "source": []
635 | }
636 | ],
637 | "metadata": {
638 | "anaconda-cloud": {},
639 | "kernelspec": {
640 | "display_name": "R",
641 | "language": "R",
642 | "name": "ir"
643 | },
644 | "language_info": {
645 | "codemirror_mode": "r",
646 | "file_extension": ".r",
647 | "mimetype": "text/x-r-source",
648 | "name": "R",
649 | "pygments_lexer": "r",
650 | "version": "3.4.1"
651 | }
652 | },
653 | "nbformat": 4,
654 | "nbformat_minor": 1
655 | }
656 |
--------------------------------------------------------------------------------
/Assignments/16_LiveFireExercise/LiveFireExercise.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Live fire: Census estimates release day\n",
8 | "\n",
9 | "Every year, the US Census Bureau releases new estimates of the population of every metropolitan area, county, city and town in the US. They are estimates because they only do the headcount census every 10 years. Between then, they use data and modeling to estimate what the population is. Every 10 years, they recalibrate their models based on how close they came to getting it right, given the headcount census. \n",
10 | "\n",
11 | "Today, we're going to simulate being in a newsroom on the day these new data are released. We're going to look at how a local news organization handled it, and we're going to show how a little bit of R and ggplot knowhow can make this better, easier and pushbutton quick next year. \n",
12 | "\n",
13 | "First, let's talk about how [a local newspaper covered it](http://journalstar.com/business/local/census-nebraska-s-big-counties-growing-rest-of-state-not/article_4317e30c-2a4b-5184-a888-ccebd4a22a04.html). What did they choose to focus on? What numerical measures did they use? Were they the right ones? Were they useful? Did they use any visuals? What could they have done differently?\n",
14 | "\n",
15 | "Now let's take our own crack at this. You are now on deadline. You have until the end of class to create a visual story out of this data, looking at the state of Nebraska. You will need to:\n",
16 | "\n",
17 | "* Create some tables of data to show trends.\n",
18 | "* Create at least two visualizations of the data.\n",
19 | "\n",
20 | "Some suggestions: Fastest growing? Fastest shrinking? Gainers to losers? One-year change vs since 2010? Every county in a lattice chart? Urban vs rural? Counties that have lost population every year this decade? Gained?\n",
21 | "\n",
22 | "Pair up, plan what you are going to do, and get started. To help you, here's some boilerplate code to get you going. **NOTE THE `read.csv` BITS. IT'S PULLING THE DATA STRAIGHT FROM THE URL.**"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stderr",
32 | "output_type": "stream",
33 | "text": [
34 | "\n",
35 | "Attaching package: ‘dplyr’\n",
36 | "\n",
37 | "The following objects are masked from ‘package:stats’:\n",
38 | "\n",
39 | " filter, lag\n",
40 | "\n",
41 | "The following objects are masked from ‘package:base’:\n",
42 | "\n",
43 | " intersect, setdiff, setequal, union\n",
44 | "\n"
45 | ]
46 | }
47 | ],
48 | "source": [
49 | "library(dplyr)\n",
50 | "library(ggplot2)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 2,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "counties <- read.csv(url(\"https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/counties/totals/co-est2017-alldata.csv\"))"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/html": [
70 | "\n",
71 | "| SUMLEV | REGION | DIVISION | STATE | COUNTY | STNAME | CTYNAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | ⋯ | RDOMESTICMIG2015 | RDOMESTICMIG2016 | RDOMESTICMIG2017 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | RNETMIG2016 | RNETMIG2017 |
\n",
72 | "\n",
73 | "\t| 40 | 3 | 6 | 1 | 0 | Alabama | Alabama | 4779736 | 4780135 | 4785579 | ⋯ | -0.3172050 | -0.404473 | 0.7888823 | 0.4507405 | 0.9393925 | 1.3642955 | 0.6942708 | 0.6785751 | 0.5589306 | 1.708218 |
\n",
74 | "\t| 50 | 3 | 6 | 1 | 1 | Alabama | Autauga County | 54571 | 54571 | 54750 | ⋯ | -1.9507393 | 4.831269 | 1.0471015 | 5.9118318 | -6.1021012 | -4.0502819 | 2.0993255 | -1.6590399 | 5.1037088 | 1.317904 |
\n",
75 | "\t| 50 | 3 | 6 | 1 | 3 | Alabama | Baldwin County | 182265 | 182265 | 183110 | ⋯ | 17.0478719 | 20.493601 | 22.3831750 | 16.2859400 | 17.1967858 | 22.6152855 | 20.3809040 | 17.9037487 | 21.3172439 | 23.163873 |
\n",
76 | "\t| 50 | 3 | 6 | 1 | 5 | Alabama | Barbour County | 27457 | 27457 | 27332 | ⋯ | -16.2224360 | -18.755525 | -19.0423948 | 0.2560211 | -6.8224333 | -8.0189202 | -5.5497616 | -16.4110690 | -18.9476921 | -19.159940 |
\n",
77 | "\t| 50 | 3 | 6 | 1 | 7 | Alabama | Bibb County | 22915 | 22919 | 22872 | ⋯ | 0.9313878 | -1.416117 | -0.8829827 | -5.0419800 | -4.0966456 | -5.8900379 | 1.2434497 | 1.8184237 | -0.5310439 | 0.000000 |
\n",
78 | "\t| 50 | 3 | 6 | 1 | 9 | Alabama | Blount County | 57322 | 57324 | 57381 | ⋯ | -1.5633685 | -1.736835 | 6.2124162 | 0.2435990 | -1.3546723 | -0.4860352 | -1.7713100 | -0.5384936 | -0.6599972 | 7.285313 |
\n",
79 | "\n",
80 | "
\n"
81 | ],
82 | "text/latex": [
83 | "\\begin{tabular}{r|llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll}\n",
84 | " SUMLEV & REGION & DIVISION & STATE & COUNTY & STNAME & CTYNAME & CENSUS2010POP & ESTIMATESBASE2010 & POPESTIMATE2010 & ⋯ & RDOMESTICMIG2015 & RDOMESTICMIG2016 & RDOMESTICMIG2017 & RNETMIG2011 & RNETMIG2012 & RNETMIG2013 & RNETMIG2014 & RNETMIG2015 & RNETMIG2016 & RNETMIG2017\\\\\n",
85 | "\\hline\n",
86 | "\t 40 & 3 & 6 & 1 & 0 & Alabama & Alabama & 4779736 & 4780135 & 4785579 & ⋯ & -0.3172050 & -0.404473 & 0.7888823 & 0.4507405 & 0.9393925 & 1.3642955 & 0.6942708 & 0.6785751 & 0.5589306 & 1.708218 \\\\\n",
87 | "\t 50 & 3 & 6 & 1 & 1 & Alabama & Autauga County & 54571 & 54571 & 54750 & ⋯ & -1.9507393 & 4.831269 & 1.0471015 & 5.9118318 & -6.1021012 & -4.0502819 & 2.0993255 & -1.6590399 & 5.1037088 & 1.317904 \\\\\n",
88 | "\t 50 & 3 & 6 & 1 & 3 & Alabama & Baldwin County & 182265 & 182265 & 183110 & ⋯ & 17.0478719 & 20.493601 & 22.3831750 & 16.2859400 & 17.1967858 & 22.6152855 & 20.3809040 & 17.9037487 & 21.3172439 & 23.163873 \\\\\n",
89 | "\t 50 & 3 & 6 & 1 & 5 & Alabama & Barbour County & 27457 & 27457 & 27332 & ⋯ & -16.2224360 & -18.755525 & -19.0423948 & 0.2560211 & -6.8224333 & -8.0189202 & -5.5497616 & -16.4110690 & -18.9476921 & -19.159940 \\\\\n",
90 | "\t 50 & 3 & 6 & 1 & 7 & Alabama & Bibb County & 22915 & 22919 & 22872 & ⋯ & 0.9313878 & -1.416117 & -0.8829827 & -5.0419800 & -4.0966456 & -5.8900379 & 1.2434497 & 1.8184237 & -0.5310439 & 0.000000 \\\\\n",
91 | "\t 50 & 3 & 6 & 1 & 9 & Alabama & Blount County & 57322 & 57324 & 57381 & ⋯ & -1.5633685 & -1.736835 & 6.2124162 & 0.2435990 & -1.3546723 & -0.4860352 & -1.7713100 & -0.5384936 & -0.6599972 & 7.285313 \\\\\n",
92 | "\\end{tabular}\n"
93 | ],
94 | "text/markdown": [
95 | "\n",
96 | "SUMLEV | REGION | DIVISION | STATE | COUNTY | STNAME | CTYNAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | ⋯ | RDOMESTICMIG2015 | RDOMESTICMIG2016 | RDOMESTICMIG2017 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | RNETMIG2016 | RNETMIG2017 | \n",
97 | "|---|---|---|---|---|---|\n",
98 | "| 40 | 3 | 6 | 1 | 0 | Alabama | Alabama | 4779736 | 4780135 | 4785579 | ⋯ | -0.3172050 | -0.404473 | 0.7888823 | 0.4507405 | 0.9393925 | 1.3642955 | 0.6942708 | 0.6785751 | 0.5589306 | 1.708218 | \n",
99 | "| 50 | 3 | 6 | 1 | 1 | Alabama | Autauga County | 54571 | 54571 | 54750 | ⋯ | -1.9507393 | 4.831269 | 1.0471015 | 5.9118318 | -6.1021012 | -4.0502819 | 2.0993255 | -1.6590399 | 5.1037088 | 1.317904 | \n",
100 | "| 50 | 3 | 6 | 1 | 3 | Alabama | Baldwin County | 182265 | 182265 | 183110 | ⋯ | 17.0478719 | 20.493601 | 22.3831750 | 16.2859400 | 17.1967858 | 22.6152855 | 20.3809040 | 17.9037487 | 21.3172439 | 23.163873 | \n",
101 | "| 50 | 3 | 6 | 1 | 5 | Alabama | Barbour County | 27457 | 27457 | 27332 | ⋯ | -16.2224360 | -18.755525 | -19.0423948 | 0.2560211 | -6.8224333 | -8.0189202 | -5.5497616 | -16.4110690 | -18.9476921 | -19.159940 | \n",
102 | "| 50 | 3 | 6 | 1 | 7 | Alabama | Bibb County | 22915 | 22919 | 22872 | ⋯ | 0.9313878 | -1.416117 | -0.8829827 | -5.0419800 | -4.0966456 | -5.8900379 | 1.2434497 | 1.8184237 | -0.5310439 | 0.000000 | \n",
103 | "| 50 | 3 | 6 | 1 | 9 | Alabama | Blount County | 57322 | 57324 | 57381 | ⋯ | -1.5633685 | -1.736835 | 6.2124162 | 0.2435990 | -1.3546723 | -0.4860352 | -1.7713100 | -0.5384936 | -0.6599972 | 7.285313 | \n",
104 | "\n",
105 | "\n"
106 | ],
107 | "text/plain": [
108 | " SUMLEV REGION DIVISION STATE COUNTY STNAME CTYNAME CENSUS2010POP\n",
109 | "1 40 3 6 1 0 Alabama Alabama 4779736 \n",
110 | "2 50 3 6 1 1 Alabama Autauga County 54571 \n",
111 | "3 50 3 6 1 3 Alabama Baldwin County 182265 \n",
112 | "4 50 3 6 1 5 Alabama Barbour County 27457 \n",
113 | "5 50 3 6 1 7 Alabama Bibb County 22915 \n",
114 | "6 50 3 6 1 9 Alabama Blount County 57322 \n",
115 | " ESTIMATESBASE2010 POPESTIMATE2010 ⋯ RDOMESTICMIG2015 RDOMESTICMIG2016\n",
116 | "1 4780135 4785579 ⋯ -0.3172050 -0.404473 \n",
117 | "2 54571 54750 ⋯ -1.9507393 4.831269 \n",
118 | "3 182265 183110 ⋯ 17.0478719 20.493601 \n",
119 | "4 27457 27332 ⋯ -16.2224360 -18.755525 \n",
120 | "5 22919 22872 ⋯ 0.9313878 -1.416117 \n",
121 | "6 57324 57381 ⋯ -1.5633685 -1.736835 \n",
122 | " RDOMESTICMIG2017 RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015\n",
123 | "1 0.7888823 0.4507405 0.9393925 1.3642955 0.6942708 0.6785751\n",
124 | "2 1.0471015 5.9118318 -6.1021012 -4.0502819 2.0993255 -1.6590399\n",
125 | "3 22.3831750 16.2859400 17.1967858 22.6152855 20.3809040 17.9037487\n",
126 | "4 -19.0423948 0.2560211 -6.8224333 -8.0189202 -5.5497616 -16.4110690\n",
127 | "5 -0.8829827 -5.0419800 -4.0966456 -5.8900379 1.2434497 1.8184237\n",
128 | "6 6.2124162 0.2435990 -1.3546723 -0.4860352 -1.7713100 -0.5384936\n",
129 | " RNETMIG2016 RNETMIG2017\n",
130 | "1 0.5589306 1.708218 \n",
131 | "2 5.1037088 1.317904 \n",
132 | "3 21.3172439 23.163873 \n",
133 | "4 -18.9476921 -19.159940 \n",
134 | "5 -0.5310439 0.000000 \n",
135 | "6 -0.6599972 7.285313 "
136 | ]
137 | },
138 | "metadata": {},
139 | "output_type": "display_data"
140 | }
141 | ],
142 | "source": [
143 | "head(counties)"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 4,
149 | "metadata": {},
150 | "outputs": [
151 | {
152 | "data": {
153 | "text/html": [
154 | "\n",
155 | "\t- 'SUMLEV'
\n",
156 | "\t- 'REGION'
\n",
157 | "\t- 'DIVISION'
\n",
158 | "\t- 'STATE'
\n",
159 | "\t- 'COUNTY'
\n",
160 | "\t- 'STNAME'
\n",
161 | "\t- 'CTYNAME'
\n",
162 | "\t- 'CENSUS2010POP'
\n",
163 | "\t- 'ESTIMATESBASE2010'
\n",
164 | "\t- 'POPESTIMATE2010'
\n",
165 | "\t- 'POPESTIMATE2011'
\n",
166 | "\t- 'POPESTIMATE2012'
\n",
167 | "\t- 'POPESTIMATE2013'
\n",
168 | "\t- 'POPESTIMATE2014'
\n",
169 | "\t- 'POPESTIMATE2015'
\n",
170 | "\t- 'POPESTIMATE2016'
\n",
171 | "\t- 'POPESTIMATE2017'
\n",
172 | "\t- 'NPOPCHG_2010'
\n",
173 | "\t- 'NPOPCHG_2011'
\n",
174 | "\t- 'NPOPCHG_2012'
\n",
175 | "\t- 'NPOPCHG_2013'
\n",
176 | "\t- 'NPOPCHG_2014'
\n",
177 | "\t- 'NPOPCHG_2015'
\n",
178 | "\t- 'NPOPCHG_2016'
\n",
179 | "\t- 'NPOPCHG_2017'
\n",
180 | "\t- 'BIRTHS2010'
\n",
181 | "\t- 'BIRTHS2011'
\n",
182 | "\t- 'BIRTHS2012'
\n",
183 | "\t- 'BIRTHS2013'
\n",
184 | "\t- 'BIRTHS2014'
\n",
185 | "\t- 'BIRTHS2015'
\n",
186 | "\t- 'BIRTHS2016'
\n",
187 | "\t- 'BIRTHS2017'
\n",
188 | "\t- 'DEATHS2010'
\n",
189 | "\t- 'DEATHS2011'
\n",
190 | "\t- 'DEATHS2012'
\n",
191 | "\t- 'DEATHS2013'
\n",
192 | "\t- 'DEATHS2014'
\n",
193 | "\t- 'DEATHS2015'
\n",
194 | "\t- 'DEATHS2016'
\n",
195 | "\t- 'DEATHS2017'
\n",
196 | "\t- 'NATURALINC2010'
\n",
197 | "\t- 'NATURALINC2011'
\n",
198 | "\t- 'NATURALINC2012'
\n",
199 | "\t- 'NATURALINC2013'
\n",
200 | "\t- 'NATURALINC2014'
\n",
201 | "\t- 'NATURALINC2015'
\n",
202 | "\t- 'NATURALINC2016'
\n",
203 | "\t- 'NATURALINC2017'
\n",
204 | "\t- 'INTERNATIONALMIG2010'
\n",
205 | "\t- 'INTERNATIONALMIG2011'
\n",
206 | "\t- 'INTERNATIONALMIG2012'
\n",
207 | "\t- 'INTERNATIONALMIG2013'
\n",
208 | "\t- 'INTERNATIONALMIG2014'
\n",
209 | "\t- 'INTERNATIONALMIG2015'
\n",
210 | "\t- 'INTERNATIONALMIG2016'
\n",
211 | "\t- 'INTERNATIONALMIG2017'
\n",
212 | "\t- 'DOMESTICMIG2010'
\n",
213 | "\t- 'DOMESTICMIG2011'
\n",
214 | "\t- 'DOMESTICMIG2012'
\n",
215 | "\t- 'DOMESTICMIG2013'
\n",
216 | "\t- 'DOMESTICMIG2014'
\n",
217 | "\t- 'DOMESTICMIG2015'
\n",
218 | "\t- 'DOMESTICMIG2016'
\n",
219 | "\t- 'DOMESTICMIG2017'
\n",
220 | "\t- 'NETMIG2010'
\n",
221 | "\t- 'NETMIG2011'
\n",
222 | "\t- 'NETMIG2012'
\n",
223 | "\t- 'NETMIG2013'
\n",
224 | "\t- 'NETMIG2014'
\n",
225 | "\t- 'NETMIG2015'
\n",
226 | "\t- 'NETMIG2016'
\n",
227 | "\t- 'NETMIG2017'
\n",
228 | "\t- 'RESIDUAL2010'
\n",
229 | "\t- 'RESIDUAL2011'
\n",
230 | "\t- 'RESIDUAL2012'
\n",
231 | "\t- 'RESIDUAL2013'
\n",
232 | "\t- 'RESIDUAL2014'
\n",
233 | "\t- 'RESIDUAL2015'
\n",
234 | "\t- 'RESIDUAL2016'
\n",
235 | "\t- 'RESIDUAL2017'
\n",
236 | "\t- 'GQESTIMATESBASE2010'
\n",
237 | "\t- 'GQESTIMATES2010'
\n",
238 | "\t- 'GQESTIMATES2011'
\n",
239 | "\t- 'GQESTIMATES2012'
\n",
240 | "\t- 'GQESTIMATES2013'
\n",
241 | "\t- 'GQESTIMATES2014'
\n",
242 | "\t- 'GQESTIMATES2015'
\n",
243 | "\t- 'GQESTIMATES2016'
\n",
244 | "\t- 'GQESTIMATES2017'
\n",
245 | "\t- 'RBIRTH2011'
\n",
246 | "\t- 'RBIRTH2012'
\n",
247 | "\t- 'RBIRTH2013'
\n",
248 | "\t- 'RBIRTH2014'
\n",
249 | "\t- 'RBIRTH2015'
\n",
250 | "\t- 'RBIRTH2016'
\n",
251 | "\t- 'RBIRTH2017'
\n",
252 | "\t- 'RDEATH2011'
\n",
253 | "\t- 'RDEATH2012'
\n",
254 | "\t- 'RDEATH2013'
\n",
255 | "\t- 'RDEATH2014'
\n",
256 | "\t- 'RDEATH2015'
\n",
257 | "\t- 'RDEATH2016'
\n",
258 | "\t- 'RDEATH2017'
\n",
259 | "\t- 'RNATURALINC2011'
\n",
260 | "\t- 'RNATURALINC2012'
\n",
261 | "\t- 'RNATURALINC2013'
\n",
262 | "\t- 'RNATURALINC2014'
\n",
263 | "\t- 'RNATURALINC2015'
\n",
264 | "\t- 'RNATURALINC2016'
\n",
265 | "\t- 'RNATURALINC2017'
\n",
266 | "\t- 'RINTERNATIONALMIG2011'
\n",
267 | "\t- 'RINTERNATIONALMIG2012'
\n",
268 | "\t- 'RINTERNATIONALMIG2013'
\n",
269 | "\t- 'RINTERNATIONALMIG2014'
\n",
270 | "\t- 'RINTERNATIONALMIG2015'
\n",
271 | "\t- 'RINTERNATIONALMIG2016'
\n",
272 | "\t- 'RINTERNATIONALMIG2017'
\n",
273 | "\t- 'RDOMESTICMIG2011'
\n",
274 | "\t- 'RDOMESTICMIG2012'
\n",
275 | "\t- 'RDOMESTICMIG2013'
\n",
276 | "\t- 'RDOMESTICMIG2014'
\n",
277 | "\t- 'RDOMESTICMIG2015'
\n",
278 | "\t- 'RDOMESTICMIG2016'
\n",
279 | "\t- 'RDOMESTICMIG2017'
\n",
280 | "\t- 'RNETMIG2011'
\n",
281 | "\t- 'RNETMIG2012'
\n",
282 | "\t- 'RNETMIG2013'
\n",
283 | "\t- 'RNETMIG2014'
\n",
284 | "\t- 'RNETMIG2015'
\n",
285 | "\t- 'RNETMIG2016'
\n",
286 | "\t- 'RNETMIG2017'
\n",
287 | "
\n"
288 | ],
289 | "text/latex": [
290 | "\\begin{enumerate*}\n",
291 | "\\item 'SUMLEV'\n",
292 | "\\item 'REGION'\n",
293 | "\\item 'DIVISION'\n",
294 | "\\item 'STATE'\n",
295 | "\\item 'COUNTY'\n",
296 | "\\item 'STNAME'\n",
297 | "\\item 'CTYNAME'\n",
298 | "\\item 'CENSUS2010POP'\n",
299 | "\\item 'ESTIMATESBASE2010'\n",
300 | "\\item 'POPESTIMATE2010'\n",
301 | "\\item 'POPESTIMATE2011'\n",
302 | "\\item 'POPESTIMATE2012'\n",
303 | "\\item 'POPESTIMATE2013'\n",
304 | "\\item 'POPESTIMATE2014'\n",
305 | "\\item 'POPESTIMATE2015'\n",
306 | "\\item 'POPESTIMATE2016'\n",
307 | "\\item 'POPESTIMATE2017'\n",
308 | "\\item 'NPOPCHG\\_2010'\n",
309 | "\\item 'NPOPCHG\\_2011'\n",
310 | "\\item 'NPOPCHG\\_2012'\n",
311 | "\\item 'NPOPCHG\\_2013'\n",
312 | "\\item 'NPOPCHG\\_2014'\n",
313 | "\\item 'NPOPCHG\\_2015'\n",
314 | "\\item 'NPOPCHG\\_2016'\n",
315 | "\\item 'NPOPCHG\\_2017'\n",
316 | "\\item 'BIRTHS2010'\n",
317 | "\\item 'BIRTHS2011'\n",
318 | "\\item 'BIRTHS2012'\n",
319 | "\\item 'BIRTHS2013'\n",
320 | "\\item 'BIRTHS2014'\n",
321 | "\\item 'BIRTHS2015'\n",
322 | "\\item 'BIRTHS2016'\n",
323 | "\\item 'BIRTHS2017'\n",
324 | "\\item 'DEATHS2010'\n",
325 | "\\item 'DEATHS2011'\n",
326 | "\\item 'DEATHS2012'\n",
327 | "\\item 'DEATHS2013'\n",
328 | "\\item 'DEATHS2014'\n",
329 | "\\item 'DEATHS2015'\n",
330 | "\\item 'DEATHS2016'\n",
331 | "\\item 'DEATHS2017'\n",
332 | "\\item 'NATURALINC2010'\n",
333 | "\\item 'NATURALINC2011'\n",
334 | "\\item 'NATURALINC2012'\n",
335 | "\\item 'NATURALINC2013'\n",
336 | "\\item 'NATURALINC2014'\n",
337 | "\\item 'NATURALINC2015'\n",
338 | "\\item 'NATURALINC2016'\n",
339 | "\\item 'NATURALINC2017'\n",
340 | "\\item 'INTERNATIONALMIG2010'\n",
341 | "\\item 'INTERNATIONALMIG2011'\n",
342 | "\\item 'INTERNATIONALMIG2012'\n",
343 | "\\item 'INTERNATIONALMIG2013'\n",
344 | "\\item 'INTERNATIONALMIG2014'\n",
345 | "\\item 'INTERNATIONALMIG2015'\n",
346 | "\\item 'INTERNATIONALMIG2016'\n",
347 | "\\item 'INTERNATIONALMIG2017'\n",
348 | "\\item 'DOMESTICMIG2010'\n",
349 | "\\item 'DOMESTICMIG2011'\n",
350 | "\\item 'DOMESTICMIG2012'\n",
351 | "\\item 'DOMESTICMIG2013'\n",
352 | "\\item 'DOMESTICMIG2014'\n",
353 | "\\item 'DOMESTICMIG2015'\n",
354 | "\\item 'DOMESTICMIG2016'\n",
355 | "\\item 'DOMESTICMIG2017'\n",
356 | "\\item 'NETMIG2010'\n",
357 | "\\item 'NETMIG2011'\n",
358 | "\\item 'NETMIG2012'\n",
359 | "\\item 'NETMIG2013'\n",
360 | "\\item 'NETMIG2014'\n",
361 | "\\item 'NETMIG2015'\n",
362 | "\\item 'NETMIG2016'\n",
363 | "\\item 'NETMIG2017'\n",
364 | "\\item 'RESIDUAL2010'\n",
365 | "\\item 'RESIDUAL2011'\n",
366 | "\\item 'RESIDUAL2012'\n",
367 | "\\item 'RESIDUAL2013'\n",
368 | "\\item 'RESIDUAL2014'\n",
369 | "\\item 'RESIDUAL2015'\n",
370 | "\\item 'RESIDUAL2016'\n",
371 | "\\item 'RESIDUAL2017'\n",
372 | "\\item 'GQESTIMATESBASE2010'\n",
373 | "\\item 'GQESTIMATES2010'\n",
374 | "\\item 'GQESTIMATES2011'\n",
375 | "\\item 'GQESTIMATES2012'\n",
376 | "\\item 'GQESTIMATES2013'\n",
377 | "\\item 'GQESTIMATES2014'\n",
378 | "\\item 'GQESTIMATES2015'\n",
379 | "\\item 'GQESTIMATES2016'\n",
380 | "\\item 'GQESTIMATES2017'\n",
381 | "\\item 'RBIRTH2011'\n",
382 | "\\item 'RBIRTH2012'\n",
383 | "\\item 'RBIRTH2013'\n",
384 | "\\item 'RBIRTH2014'\n",
385 | "\\item 'RBIRTH2015'\n",
386 | "\\item 'RBIRTH2016'\n",
387 | "\\item 'RBIRTH2017'\n",
388 | "\\item 'RDEATH2011'\n",
389 | "\\item 'RDEATH2012'\n",
390 | "\\item 'RDEATH2013'\n",
391 | "\\item 'RDEATH2014'\n",
392 | "\\item 'RDEATH2015'\n",
393 | "\\item 'RDEATH2016'\n",
394 | "\\item 'RDEATH2017'\n",
395 | "\\item 'RNATURALINC2011'\n",
396 | "\\item 'RNATURALINC2012'\n",
397 | "\\item 'RNATURALINC2013'\n",
398 | "\\item 'RNATURALINC2014'\n",
399 | "\\item 'RNATURALINC2015'\n",
400 | "\\item 'RNATURALINC2016'\n",
401 | "\\item 'RNATURALINC2017'\n",
402 | "\\item 'RINTERNATIONALMIG2011'\n",
403 | "\\item 'RINTERNATIONALMIG2012'\n",
404 | "\\item 'RINTERNATIONALMIG2013'\n",
405 | "\\item 'RINTERNATIONALMIG2014'\n",
406 | "\\item 'RINTERNATIONALMIG2015'\n",
407 | "\\item 'RINTERNATIONALMIG2016'\n",
408 | "\\item 'RINTERNATIONALMIG2017'\n",
409 | "\\item 'RDOMESTICMIG2011'\n",
410 | "\\item 'RDOMESTICMIG2012'\n",
411 | "\\item 'RDOMESTICMIG2013'\n",
412 | "\\item 'RDOMESTICMIG2014'\n",
413 | "\\item 'RDOMESTICMIG2015'\n",
414 | "\\item 'RDOMESTICMIG2016'\n",
415 | "\\item 'RDOMESTICMIG2017'\n",
416 | "\\item 'RNETMIG2011'\n",
417 | "\\item 'RNETMIG2012'\n",
418 | "\\item 'RNETMIG2013'\n",
419 | "\\item 'RNETMIG2014'\n",
420 | "\\item 'RNETMIG2015'\n",
421 | "\\item 'RNETMIG2016'\n",
422 | "\\item 'RNETMIG2017'\n",
423 | "\\end{enumerate*}\n"
424 | ],
425 | "text/markdown": [
426 | "1. 'SUMLEV'\n",
427 | "2. 'REGION'\n",
428 | "3. 'DIVISION'\n",
429 | "4. 'STATE'\n",
430 | "5. 'COUNTY'\n",
431 | "6. 'STNAME'\n",
432 | "7. 'CTYNAME'\n",
433 | "8. 'CENSUS2010POP'\n",
434 | "9. 'ESTIMATESBASE2010'\n",
435 | "10. 'POPESTIMATE2010'\n",
436 | "11. 'POPESTIMATE2011'\n",
437 | "12. 'POPESTIMATE2012'\n",
438 | "13. 'POPESTIMATE2013'\n",
439 | "14. 'POPESTIMATE2014'\n",
440 | "15. 'POPESTIMATE2015'\n",
441 | "16. 'POPESTIMATE2016'\n",
442 | "17. 'POPESTIMATE2017'\n",
443 | "18. 'NPOPCHG_2010'\n",
444 | "19. 'NPOPCHG_2011'\n",
445 | "20. 'NPOPCHG_2012'\n",
446 | "21. 'NPOPCHG_2013'\n",
447 | "22. 'NPOPCHG_2014'\n",
448 | "23. 'NPOPCHG_2015'\n",
449 | "24. 'NPOPCHG_2016'\n",
450 | "25. 'NPOPCHG_2017'\n",
451 | "26. 'BIRTHS2010'\n",
452 | "27. 'BIRTHS2011'\n",
453 | "28. 'BIRTHS2012'\n",
454 | "29. 'BIRTHS2013'\n",
455 | "30. 'BIRTHS2014'\n",
456 | "31. 'BIRTHS2015'\n",
457 | "32. 'BIRTHS2016'\n",
458 | "33. 'BIRTHS2017'\n",
459 | "34. 'DEATHS2010'\n",
460 | "35. 'DEATHS2011'\n",
461 | "36. 'DEATHS2012'\n",
462 | "37. 'DEATHS2013'\n",
463 | "38. 'DEATHS2014'\n",
464 | "39. 'DEATHS2015'\n",
465 | "40. 'DEATHS2016'\n",
466 | "41. 'DEATHS2017'\n",
467 | "42. 'NATURALINC2010'\n",
468 | "43. 'NATURALINC2011'\n",
469 | "44. 'NATURALINC2012'\n",
470 | "45. 'NATURALINC2013'\n",
471 | "46. 'NATURALINC2014'\n",
472 | "47. 'NATURALINC2015'\n",
473 | "48. 'NATURALINC2016'\n",
474 | "49. 'NATURALINC2017'\n",
475 | "50. 'INTERNATIONALMIG2010'\n",
476 | "51. 'INTERNATIONALMIG2011'\n",
477 | "52. 'INTERNATIONALMIG2012'\n",
478 | "53. 'INTERNATIONALMIG2013'\n",
479 | "54. 'INTERNATIONALMIG2014'\n",
480 | "55. 'INTERNATIONALMIG2015'\n",
481 | "56. 'INTERNATIONALMIG2016'\n",
482 | "57. 'INTERNATIONALMIG2017'\n",
483 | "58. 'DOMESTICMIG2010'\n",
484 | "59. 'DOMESTICMIG2011'\n",
485 | "60. 'DOMESTICMIG2012'\n",
486 | "61. 'DOMESTICMIG2013'\n",
487 | "62. 'DOMESTICMIG2014'\n",
488 | "63. 'DOMESTICMIG2015'\n",
489 | "64. 'DOMESTICMIG2016'\n",
490 | "65. 'DOMESTICMIG2017'\n",
491 | "66. 'NETMIG2010'\n",
492 | "67. 'NETMIG2011'\n",
493 | "68. 'NETMIG2012'\n",
494 | "69. 'NETMIG2013'\n",
495 | "70. 'NETMIG2014'\n",
496 | "71. 'NETMIG2015'\n",
497 | "72. 'NETMIG2016'\n",
498 | "73. 'NETMIG2017'\n",
499 | "74. 'RESIDUAL2010'\n",
500 | "75. 'RESIDUAL2011'\n",
501 | "76. 'RESIDUAL2012'\n",
502 | "77. 'RESIDUAL2013'\n",
503 | "78. 'RESIDUAL2014'\n",
504 | "79. 'RESIDUAL2015'\n",
505 | "80. 'RESIDUAL2016'\n",
506 | "81. 'RESIDUAL2017'\n",
507 | "82. 'GQESTIMATESBASE2010'\n",
508 | "83. 'GQESTIMATES2010'\n",
509 | "84. 'GQESTIMATES2011'\n",
510 | "85. 'GQESTIMATES2012'\n",
511 | "86. 'GQESTIMATES2013'\n",
512 | "87. 'GQESTIMATES2014'\n",
513 | "88. 'GQESTIMATES2015'\n",
514 | "89. 'GQESTIMATES2016'\n",
515 | "90. 'GQESTIMATES2017'\n",
516 | "91. 'RBIRTH2011'\n",
517 | "92. 'RBIRTH2012'\n",
518 | "93. 'RBIRTH2013'\n",
519 | "94. 'RBIRTH2014'\n",
520 | "95. 'RBIRTH2015'\n",
521 | "96. 'RBIRTH2016'\n",
522 | "97. 'RBIRTH2017'\n",
523 | "98. 'RDEATH2011'\n",
524 | "99. 'RDEATH2012'\n",
525 | "100. 'RDEATH2013'\n",
526 | "101. 'RDEATH2014'\n",
527 | "102. 'RDEATH2015'\n",
528 | "103. 'RDEATH2016'\n",
529 | "104. 'RDEATH2017'\n",
530 | "105. 'RNATURALINC2011'\n",
531 | "106. 'RNATURALINC2012'\n",
532 | "107. 'RNATURALINC2013'\n",
533 | "108. 'RNATURALINC2014'\n",
534 | "109. 'RNATURALINC2015'\n",
535 | "110. 'RNATURALINC2016'\n",
536 | "111. 'RNATURALINC2017'\n",
537 | "112. 'RINTERNATIONALMIG2011'\n",
538 | "113. 'RINTERNATIONALMIG2012'\n",
539 | "114. 'RINTERNATIONALMIG2013'\n",
540 | "115. 'RINTERNATIONALMIG2014'\n",
541 | "116. 'RINTERNATIONALMIG2015'\n",
542 | "117. 'RINTERNATIONALMIG2016'\n",
543 | "118. 'RINTERNATIONALMIG2017'\n",
544 | "119. 'RDOMESTICMIG2011'\n",
545 | "120. 'RDOMESTICMIG2012'\n",
546 | "121. 'RDOMESTICMIG2013'\n",
547 | "122. 'RDOMESTICMIG2014'\n",
548 | "123. 'RDOMESTICMIG2015'\n",
549 | "124. 'RDOMESTICMIG2016'\n",
550 | "125. 'RDOMESTICMIG2017'\n",
551 | "126. 'RNETMIG2011'\n",
552 | "127. 'RNETMIG2012'\n",
553 | "128. 'RNETMIG2013'\n",
554 | "129. 'RNETMIG2014'\n",
555 | "130. 'RNETMIG2015'\n",
556 | "131. 'RNETMIG2016'\n",
557 | "132. 'RNETMIG2017'\n",
558 | "\n",
559 | "\n"
560 | ],
561 | "text/plain": [
562 | " [1] \"SUMLEV\" \"REGION\" \"DIVISION\" \n",
563 | " [4] \"STATE\" \"COUNTY\" \"STNAME\" \n",
564 | " [7] \"CTYNAME\" \"CENSUS2010POP\" \"ESTIMATESBASE2010\" \n",
565 | " [10] \"POPESTIMATE2010\" \"POPESTIMATE2011\" \"POPESTIMATE2012\" \n",
566 | " [13] \"POPESTIMATE2013\" \"POPESTIMATE2014\" \"POPESTIMATE2015\" \n",
567 | " [16] \"POPESTIMATE2016\" \"POPESTIMATE2017\" \"NPOPCHG_2010\" \n",
568 | " [19] \"NPOPCHG_2011\" \"NPOPCHG_2012\" \"NPOPCHG_2013\" \n",
569 | " [22] \"NPOPCHG_2014\" \"NPOPCHG_2015\" \"NPOPCHG_2016\" \n",
570 | " [25] \"NPOPCHG_2017\" \"BIRTHS2010\" \"BIRTHS2011\" \n",
571 | " [28] \"BIRTHS2012\" \"BIRTHS2013\" \"BIRTHS2014\" \n",
572 | " [31] \"BIRTHS2015\" \"BIRTHS2016\" \"BIRTHS2017\" \n",
573 | " [34] \"DEATHS2010\" \"DEATHS2011\" \"DEATHS2012\" \n",
574 | " [37] \"DEATHS2013\" \"DEATHS2014\" \"DEATHS2015\" \n",
575 | " [40] \"DEATHS2016\" \"DEATHS2017\" \"NATURALINC2010\" \n",
576 | " [43] \"NATURALINC2011\" \"NATURALINC2012\" \"NATURALINC2013\" \n",
577 | " [46] \"NATURALINC2014\" \"NATURALINC2015\" \"NATURALINC2016\" \n",
578 | " [49] \"NATURALINC2017\" \"INTERNATIONALMIG2010\" \"INTERNATIONALMIG2011\" \n",
579 | " [52] \"INTERNATIONALMIG2012\" \"INTERNATIONALMIG2013\" \"INTERNATIONALMIG2014\" \n",
580 | " [55] \"INTERNATIONALMIG2015\" \"INTERNATIONALMIG2016\" \"INTERNATIONALMIG2017\" \n",
581 | " [58] \"DOMESTICMIG2010\" \"DOMESTICMIG2011\" \"DOMESTICMIG2012\" \n",
582 | " [61] \"DOMESTICMIG2013\" \"DOMESTICMIG2014\" \"DOMESTICMIG2015\" \n",
583 | " [64] \"DOMESTICMIG2016\" \"DOMESTICMIG2017\" \"NETMIG2010\" \n",
584 | " [67] \"NETMIG2011\" \"NETMIG2012\" \"NETMIG2013\" \n",
585 | " [70] \"NETMIG2014\" \"NETMIG2015\" \"NETMIG2016\" \n",
586 | " [73] \"NETMIG2017\" \"RESIDUAL2010\" \"RESIDUAL2011\" \n",
587 | " [76] \"RESIDUAL2012\" \"RESIDUAL2013\" \"RESIDUAL2014\" \n",
588 | " [79] \"RESIDUAL2015\" \"RESIDUAL2016\" \"RESIDUAL2017\" \n",
589 | " [82] \"GQESTIMATESBASE2010\" \"GQESTIMATES2010\" \"GQESTIMATES2011\" \n",
590 | " [85] \"GQESTIMATES2012\" \"GQESTIMATES2013\" \"GQESTIMATES2014\" \n",
591 | " [88] \"GQESTIMATES2015\" \"GQESTIMATES2016\" \"GQESTIMATES2017\" \n",
592 | " [91] \"RBIRTH2011\" \"RBIRTH2012\" \"RBIRTH2013\" \n",
593 | " [94] \"RBIRTH2014\" \"RBIRTH2015\" \"RBIRTH2016\" \n",
594 | " [97] \"RBIRTH2017\" \"RDEATH2011\" \"RDEATH2012\" \n",
595 | "[100] \"RDEATH2013\" \"RDEATH2014\" \"RDEATH2015\" \n",
596 | "[103] \"RDEATH2016\" \"RDEATH2017\" \"RNATURALINC2011\" \n",
597 | "[106] \"RNATURALINC2012\" \"RNATURALINC2013\" \"RNATURALINC2014\" \n",
598 | "[109] \"RNATURALINC2015\" \"RNATURALINC2016\" \"RNATURALINC2017\" \n",
599 | "[112] \"RINTERNATIONALMIG2011\" \"RINTERNATIONALMIG2012\" \"RINTERNATIONALMIG2013\"\n",
600 | "[115] \"RINTERNATIONALMIG2014\" \"RINTERNATIONALMIG2015\" \"RINTERNATIONALMIG2016\"\n",
601 | "[118] \"RINTERNATIONALMIG2017\" \"RDOMESTICMIG2011\" \"RDOMESTICMIG2012\" \n",
602 | "[121] \"RDOMESTICMIG2013\" \"RDOMESTICMIG2014\" \"RDOMESTICMIG2015\" \n",
603 | "[124] \"RDOMESTICMIG2016\" \"RDOMESTICMIG2017\" \"RNETMIG2011\" \n",
604 | "[127] \"RNETMIG2012\" \"RNETMIG2013\" \"RNETMIG2014\" \n",
605 | "[130] \"RNETMIG2015\" \"RNETMIG2016\" \"RNETMIG2017\" "
606 | ]
607 | },
608 | "metadata": {},
609 | "output_type": "display_data"
610 | }
611 | ],
612 | "source": [
613 | "colnames(counties)"
614 | ]
615 | },
616 | {
617 | "cell_type": "markdown",
618 | "metadata": {},
619 | "source": [
620 | "Here's some code to filter out just Nebraska counties, remove the statewide total number and calculate percent change into a field called change. "
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": 5,
626 | "metadata": {},
627 | "outputs": [],
628 | "source": [
629 | "nebraska <- counties %>% \n",
630 | "filter(STNAME == \"Nebraska\") %>% \n",
631 | "filter(SUMLEV == 50) %>% \n",
632 | "mutate(change = ((POPESTIMATE2017-POPESTIMATE2016)/POPESTIMATE2016)*100)"
633 | ]
634 | },
635 | {
636 | "cell_type": "markdown",
637 | "metadata": {},
638 | "source": [
639 | "### Homework:\n",
640 | "\n",
641 | "Read Tufte 2,3 and 5 and be prepared for a disussion of lying with charts. Also, prepare a pitch for your next visual story, which is due Thursday of Dead Week. "
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {},
648 | "outputs": [],
649 | "source": []
650 | }
651 | ],
652 | "metadata": {
653 | "anaconda-cloud": {},
654 | "kernelspec": {
655 | "display_name": "R",
656 | "language": "R",
657 | "name": "ir"
658 | },
659 | "language_info": {
660 | "codemirror_mode": "r",
661 | "file_extension": ".r",
662 | "mimetype": "text/x-r-source",
663 | "name": "R",
664 | "pygments_lexer": "r",
665 | "version": "3.4.3"
666 | }
667 | },
668 | "nbformat": 4,
669 | "nbformat_minor": 1
670 | }
671 |
--------------------------------------------------------------------------------
/Assignments/2_R_Basics/RBasicsPart2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Basic data analysis in R\n",
8 | "\n",
9 | "R is a statistical programming language that is purpose built for data analysis. \n",
10 | "\n",
11 | "Base R does a lot, but there are a mountain of external libraries that do things to make R better/easier/more fully featured. One of the best libraries, in your professor's opinion, is `dplyr`, a library for working with data. To use dplyr, you need to import it. "
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "name": "stderr",
21 | "output_type": "stream",
22 | "text": [
23 | "\n",
24 | "Attaching package: ‘dplyr’\n",
25 | "\n",
26 | "The following objects are masked from ‘package:stats’:\n",
27 | "\n",
28 | " filter, lag\n",
29 | "\n",
30 | "The following objects are masked from ‘package:base’:\n",
31 | "\n",
32 | " intersect, setdiff, setequal, union\n",
33 | "\n"
34 | ]
35 | }
36 | ],
37 | "source": [
38 | "library(dplyr)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "The first thing we need to do is get some data to work with. We do that by reading it in. In our case, we're going to read data from a csv file -- a comma-separated values file. \n",
46 | "\n",
47 | "The code looks like this: \n",
48 | "\n",
49 | "`mountainlions <- read.csv(\"../../Data/mountainlions.csv\")`\n",
50 | "\n",
51 | "Let's unpack that. \n",
52 | "\n",
53 | "The first part -- `mountainlions` -- is the name of your variable. A variable is just a name of a thing. In this case, our variable is a data frame, which is R's way of storing data. We can call this whatever we want. I always want to name data frames after what is in it. In this case, we're going to import a dataset of mountain lion sightings from the Nebraska Game and Parks Commission.\n",
54 | "\n",
55 | "The `<-` bit is the variable assignment operator. It's how we know we're assigning something to a word. \n",
56 | "\n",
57 | "The `read.csv` bits are pretty obvious. What happens in the quote marks is the path to the data. In there, I have to tell R where it find the data. The easiest thing to do, if you are confused about how to find your data, is to put your data in the same folder as as your notebook. In my case, I've got a folder called Data that's two levels up from my work folder. So the `../` means move up one level. So move up one level, move up one level, find Data, then in there is a file called mountainlions.csv. \n",
58 | "\n",
59 | "What you put in there will be different from mine. So your first task is to import the data. "
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 2,
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "mountainlions <- read.csv(\"../../Data/mountainlions.csv\")"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "Now we can inspect the data we imported. What does it look like? To do that, we use `head(mountainlions)` to show the headers and the first six rows of data. If we wanted to see them all, we could just simply enter `mountainlions` and run it. \n",
78 | "\n",
79 | "To get the number of records in our dataset, we run `nrow(mountainlions)`"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/html": [
90 | "\n",
91 | "| ID | Cofirm.Type | COUNTY | Date |
\n",
92 | "\n",
93 | "\t| 1 | Track | Dawes | 9/14/91 |
\n",
94 | "\t| 2 | Mortality | Sioux | 11/10/91 |
\n",
95 | "\t| 3 | Mortality | Scotts Bluff | 4/21/96 |
\n",
96 | "\t| 4 | Mortality | Sioux | 5/9/99 |
\n",
97 | "\t| 5 | Mortality | Box Butte | 9/29/99 |
\n",
98 | "\t| 6 | Track | Scotts Bluff | 11/12/99 |
\n",
99 | "\n",
100 | "
\n"
101 | ],
102 | "text/latex": [
103 | "\\begin{tabular}{r|llll}\n",
104 | " ID & Cofirm.Type & COUNTY & Date\\\\\n",
105 | "\\hline\n",
106 | "\t 1 & Track & Dawes & 9/14/91 \\\\\n",
107 | "\t 2 & Mortality & Sioux & 11/10/91 \\\\\n",
108 | "\t 3 & Mortality & Scotts Bluff & 4/21/96 \\\\\n",
109 | "\t 4 & Mortality & Sioux & 5/9/99 \\\\\n",
110 | "\t 5 & Mortality & Box Butte & 9/29/99 \\\\\n",
111 | "\t 6 & Track & Scotts Bluff & 11/12/99 \\\\\n",
112 | "\\end{tabular}\n"
113 | ],
114 | "text/markdown": [
115 | "\n",
116 | "ID | Cofirm.Type | COUNTY | Date | \n",
117 | "|---|---|---|---|---|---|\n",
118 | "| 1 | Track | Dawes | 9/14/91 | \n",
119 | "| 2 | Mortality | Sioux | 11/10/91 | \n",
120 | "| 3 | Mortality | Scotts Bluff | 4/21/96 | \n",
121 | "| 4 | Mortality | Sioux | 5/9/99 | \n",
122 | "| 5 | Mortality | Box Butte | 9/29/99 | \n",
123 | "| 6 | Track | Scotts Bluff | 11/12/99 | \n",
124 | "\n",
125 | "\n"
126 | ],
127 | "text/plain": [
128 | " ID Cofirm.Type COUNTY Date \n",
129 | "1 1 Track Dawes 9/14/91 \n",
130 | "2 2 Mortality Sioux 11/10/91\n",
131 | "3 3 Mortality Scotts Bluff 4/21/96 \n",
132 | "4 4 Mortality Sioux 5/9/99 \n",
133 | "5 5 Mortality Box Butte 9/29/99 \n",
134 | "6 6 Track Scotts Bluff 11/12/99"
135 | ]
136 | },
137 | "metadata": {},
138 | "output_type": "display_data"
139 | },
140 | {
141 | "data": {
142 | "text/html": [
143 | "393"
144 | ],
145 | "text/latex": [
146 | "393"
147 | ],
148 | "text/markdown": [
149 | "393"
150 | ],
151 | "text/plain": [
152 | "[1] 393"
153 | ]
154 | },
155 | "metadata": {},
156 | "output_type": "display_data"
157 | }
158 | ],
159 | "source": [
160 | "head(mountainlions)\n",
161 | "nrow(mountainlions)"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "So what if we wanted to know how many mountain lion sightings there were in each county? To do that by hand, we'd have to take each of the 393 records and sort them into a pile. We'd put them in groups and then count them. \n",
169 | "\n",
170 | "`dplyr` has a group by function in it that does just this. A massive amount of data analysis involves grouping like things together at some point. So it's a good place to start. \n",
171 | "\n",
172 | "So to do this, we'll take our dataset and we'll introduce a new operator: `%>%`. The best way to read that operator, in my opinion, is to interpret that as \"and then do this.\" Here's the code: "
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 4,
178 | "metadata": {},
179 | "outputs": [
180 | {
181 | "data": {
182 | "text/html": [
183 | "\n",
184 | "| COUNTY | count |
\n",
185 | "\n",
186 | "\t| Banner | 6 |
\n",
187 | "\t| Blaine | 3 |
\n",
188 | "\t| Box Butte | 4 |
\n",
189 | "\t| Brown | 15 |
\n",
190 | "\t| Buffalo | 3 |
\n",
191 | "\t| Cedar | 1 |
\n",
192 | "\t| Cherry | 30 |
\n",
193 | "\t| Custer | 8 |
\n",
194 | "\t| Dakota | 3 |
\n",
195 | "\t| Dawes | 111 |
\n",
196 | "\t| Dawson | 5 |
\n",
197 | "\t| Dixon | 3 |
\n",
198 | "\t| Douglas | 2 |
\n",
199 | "\t| Frontier | 1 |
\n",
200 | "\t| Hall | 1 |
\n",
201 | "\t| Holt | 2 |
\n",
202 | "\t| Hooker | 1 |
\n",
203 | "\t| Howard | 3 |
\n",
204 | "\t| Keith | 1 |
\n",
205 | "\t| Keya Paha | 20 |
\n",
206 | "\t| Kimball | 1 |
\n",
207 | "\t| Knox | 8 |
\n",
208 | "\t| Lincoln | 10 |
\n",
209 | "\t| Merrick | 1 |
\n",
210 | "\t| Morrill | 2 |
\n",
211 | "\t| Nance | 1 |
\n",
212 | "\t| Nemaha | 5 |
\n",
213 | "\t| Platte | 1 |
\n",
214 | "\t| Polk | 1 |
\n",
215 | "\t| Richardson | 2 |
\n",
216 | "\t| Rock | 11 |
\n",
217 | "\t| Sarpy | 1 |
\n",
218 | "\t| Saunders | 2 |
\n",
219 | "\t| Scotts Bluff | 26 |
\n",
220 | "\t| sheridan | 2 |
\n",
221 | "\t| Sheridan | 35 |
\n",
222 | "\t| Sherman | 1 |
\n",
223 | "\t| Sioux | 52 |
\n",
224 | "\t| Thomas | 5 |
\n",
225 | "\t| Thurston | 1 |
\n",
226 | "\t| Valley | 1 |
\n",
227 | "\t| Wheeler | 1 |
\n",
228 | "\n",
229 | "
\n"
230 | ],
231 | "text/latex": [
232 | "\\begin{tabular}{r|ll}\n",
233 | " COUNTY & count\\\\\n",
234 | "\\hline\n",
235 | "\t Banner & 6 \\\\\n",
236 | "\t Blaine & 3 \\\\\n",
237 | "\t Box Butte & 4 \\\\\n",
238 | "\t Brown & 15 \\\\\n",
239 | "\t Buffalo & 3 \\\\\n",
240 | "\t Cedar & 1 \\\\\n",
241 | "\t Cherry & 30 \\\\\n",
242 | "\t Custer & 8 \\\\\n",
243 | "\t Dakota & 3 \\\\\n",
244 | "\t Dawes & 111 \\\\\n",
245 | "\t Dawson & 5 \\\\\n",
246 | "\t Dixon & 3 \\\\\n",
247 | "\t Douglas & 2 \\\\\n",
248 | "\t Frontier & 1 \\\\\n",
249 | "\t Hall & 1 \\\\\n",
250 | "\t Holt & 2 \\\\\n",
251 | "\t Hooker & 1 \\\\\n",
252 | "\t Howard & 3 \\\\\n",
253 | "\t Keith & 1 \\\\\n",
254 | "\t Keya Paha & 20 \\\\\n",
255 | "\t Kimball & 1 \\\\\n",
256 | "\t Knox & 8 \\\\\n",
257 | "\t Lincoln & 10 \\\\\n",
258 | "\t Merrick & 1 \\\\\n",
259 | "\t Morrill & 2 \\\\\n",
260 | "\t Nance & 1 \\\\\n",
261 | "\t Nemaha & 5 \\\\\n",
262 | "\t Platte & 1 \\\\\n",
263 | "\t Polk & 1 \\\\\n",
264 | "\t Richardson & 2 \\\\\n",
265 | "\t Rock & 11 \\\\\n",
266 | "\t Sarpy & 1 \\\\\n",
267 | "\t Saunders & 2 \\\\\n",
268 | "\t Scotts Bluff & 26 \\\\\n",
269 | "\t sheridan & 2 \\\\\n",
270 | "\t Sheridan & 35 \\\\\n",
271 | "\t Sherman & 1 \\\\\n",
272 | "\t Sioux & 52 \\\\\n",
273 | "\t Thomas & 5 \\\\\n",
274 | "\t Thurston & 1 \\\\\n",
275 | "\t Valley & 1 \\\\\n",
276 | "\t Wheeler & 1 \\\\\n",
277 | "\\end{tabular}\n"
278 | ],
279 | "text/markdown": [
280 | "\n",
281 | "COUNTY | count | \n",
282 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
283 | "| Banner | 6 | \n",
284 | "| Blaine | 3 | \n",
285 | "| Box Butte | 4 | \n",
286 | "| Brown | 15 | \n",
287 | "| Buffalo | 3 | \n",
288 | "| Cedar | 1 | \n",
289 | "| Cherry | 30 | \n",
290 | "| Custer | 8 | \n",
291 | "| Dakota | 3 | \n",
292 | "| Dawes | 111 | \n",
293 | "| Dawson | 5 | \n",
294 | "| Dixon | 3 | \n",
295 | "| Douglas | 2 | \n",
296 | "| Frontier | 1 | \n",
297 | "| Hall | 1 | \n",
298 | "| Holt | 2 | \n",
299 | "| Hooker | 1 | \n",
300 | "| Howard | 3 | \n",
301 | "| Keith | 1 | \n",
302 | "| Keya Paha | 20 | \n",
303 | "| Kimball | 1 | \n",
304 | "| Knox | 8 | \n",
305 | "| Lincoln | 10 | \n",
306 | "| Merrick | 1 | \n",
307 | "| Morrill | 2 | \n",
308 | "| Nance | 1 | \n",
309 | "| Nemaha | 5 | \n",
310 | "| Platte | 1 | \n",
311 | "| Polk | 1 | \n",
312 | "| Richardson | 2 | \n",
313 | "| Rock | 11 | \n",
314 | "| Sarpy | 1 | \n",
315 | "| Saunders | 2 | \n",
316 | "| Scotts Bluff | 26 | \n",
317 | "| sheridan | 2 | \n",
318 | "| Sheridan | 35 | \n",
319 | "| Sherman | 1 | \n",
320 | "| Sioux | 52 | \n",
321 | "| Thomas | 5 | \n",
322 | "| Thurston | 1 | \n",
323 | "| Valley | 1 | \n",
324 | "| Wheeler | 1 | \n",
325 | "\n",
326 | "\n"
327 | ],
328 | "text/plain": [
329 | " COUNTY count\n",
330 | "1 Banner 6 \n",
331 | "2 Blaine 3 \n",
332 | "3 Box Butte 4 \n",
333 | "4 Brown 15 \n",
334 | "5 Buffalo 3 \n",
335 | "6 Cedar 1 \n",
336 | "7 Cherry 30 \n",
337 | "8 Custer 8 \n",
338 | "9 Dakota 3 \n",
339 | "10 Dawes 111 \n",
340 | "11 Dawson 5 \n",
341 | "12 Dixon 3 \n",
342 | "13 Douglas 2 \n",
343 | "14 Frontier 1 \n",
344 | "15 Hall 1 \n",
345 | "16 Holt 2 \n",
346 | "17 Hooker 1 \n",
347 | "18 Howard 3 \n",
348 | "19 Keith 1 \n",
349 | "20 Keya Paha 20 \n",
350 | "21 Kimball 1 \n",
351 | "22 Knox 8 \n",
352 | "23 Lincoln 10 \n",
353 | "24 Merrick 1 \n",
354 | "25 Morrill 2 \n",
355 | "26 Nance 1 \n",
356 | "27 Nemaha 5 \n",
357 | "28 Platte 1 \n",
358 | "29 Polk 1 \n",
359 | "30 Richardson 2 \n",
360 | "31 Rock 11 \n",
361 | "32 Sarpy 1 \n",
362 | "33 Saunders 2 \n",
363 | "34 Scotts Bluff 26 \n",
364 | "35 sheridan 2 \n",
365 | "36 Sheridan 35 \n",
366 | "37 Sherman 1 \n",
367 | "38 Sioux 52 \n",
368 | "39 Thomas 5 \n",
369 | "40 Thurston 1 \n",
370 | "41 Valley 1 \n",
371 | "42 Wheeler 1 "
372 | ]
373 | },
374 | "metadata": {},
375 | "output_type": "display_data"
376 | }
377 | ],
378 | "source": [
379 | "mountainlions %>%\n",
380 | " group_by(COUNTY) %>%\n",
381 | " summarise(\n",
382 | " count = n(),\n",
383 | " ) "
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "So let's walk through that. We start with our dataset -- `mountainlions` -- and then we tell it to group the data by a given field in the data. In this case, we wanted to group together all the counties, signified by the field name COUNTY, which you could get from looking at `head(mountainlions)`. So after we group the data, we need to count them up. In dplyr, we use `summarize` [which can do more than just count things](http://dplyr.tidyverse.org/reference/summarise.html). So inside the parentheses in summarize, we set up the summaries we want. In this case, we just want a count of the counties. So `count = n(),` says create a new field, called `count` and set it equal to `n()`, which might look weird, but it's common in stats. The number of things in a dataset? Statisticians call in n. There are n number of incidents in this dataset. So `n()` is a function that counts the number of things there are. \n",
391 | "\n",
392 | "And when we run that, we get a list of counties with a count next to them. But it's not in any order. So we'll add another And Then Do This %>% and use `arrange`. Arrange does what you think it does -- it arranges data in order. By default, it's in ascending order -- smallest to largest. But if we want to know the county with the most mountain lion sightings, we need to sort it in descending order. That looks like this:"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": 5,
398 | "metadata": {},
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/html": [
403 | "\n",
404 | "| COUNTY | count |
\n",
405 | "\n",
406 | "\t| Dawes | 111 |
\n",
407 | "\t| Sioux | 52 |
\n",
408 | "\t| Sheridan | 35 |
\n",
409 | "\t| Cherry | 30 |
\n",
410 | "\t| Scotts Bluff | 26 |
\n",
411 | "\t| Keya Paha | 20 |
\n",
412 | "\t| Brown | 15 |
\n",
413 | "\t| Rock | 11 |
\n",
414 | "\t| Lincoln | 10 |
\n",
415 | "\t| Custer | 8 |
\n",
416 | "\t| Knox | 8 |
\n",
417 | "\t| Banner | 6 |
\n",
418 | "\t| Dawson | 5 |
\n",
419 | "\t| Nemaha | 5 |
\n",
420 | "\t| Thomas | 5 |
\n",
421 | "\t| Box Butte | 4 |
\n",
422 | "\t| Blaine | 3 |
\n",
423 | "\t| Buffalo | 3 |
\n",
424 | "\t| Dakota | 3 |
\n",
425 | "\t| Dixon | 3 |
\n",
426 | "\t| Howard | 3 |
\n",
427 | "\t| Douglas | 2 |
\n",
428 | "\t| Holt | 2 |
\n",
429 | "\t| Morrill | 2 |
\n",
430 | "\t| Richardson | 2 |
\n",
431 | "\t| Saunders | 2 |
\n",
432 | "\t| sheridan | 2 |
\n",
433 | "\t| Cedar | 1 |
\n",
434 | "\t| Frontier | 1 |
\n",
435 | "\t| Hall | 1 |
\n",
436 | "\t| Hooker | 1 |
\n",
437 | "\t| Keith | 1 |
\n",
438 | "\t| Kimball | 1 |
\n",
439 | "\t| Merrick | 1 |
\n",
440 | "\t| Nance | 1 |
\n",
441 | "\t| Platte | 1 |
\n",
442 | "\t| Polk | 1 |
\n",
443 | "\t| Sarpy | 1 |
\n",
444 | "\t| Sherman | 1 |
\n",
445 | "\t| Thurston | 1 |
\n",
446 | "\t| Valley | 1 |
\n",
447 | "\t| Wheeler | 1 |
\n",
448 | "\n",
449 | "
\n"
450 | ],
451 | "text/latex": [
452 | "\\begin{tabular}{r|ll}\n",
453 | " COUNTY & count\\\\\n",
454 | "\\hline\n",
455 | "\t Dawes & 111 \\\\\n",
456 | "\t Sioux & 52 \\\\\n",
457 | "\t Sheridan & 35 \\\\\n",
458 | "\t Cherry & 30 \\\\\n",
459 | "\t Scotts Bluff & 26 \\\\\n",
460 | "\t Keya Paha & 20 \\\\\n",
461 | "\t Brown & 15 \\\\\n",
462 | "\t Rock & 11 \\\\\n",
463 | "\t Lincoln & 10 \\\\\n",
464 | "\t Custer & 8 \\\\\n",
465 | "\t Knox & 8 \\\\\n",
466 | "\t Banner & 6 \\\\\n",
467 | "\t Dawson & 5 \\\\\n",
468 | "\t Nemaha & 5 \\\\\n",
469 | "\t Thomas & 5 \\\\\n",
470 | "\t Box Butte & 4 \\\\\n",
471 | "\t Blaine & 3 \\\\\n",
472 | "\t Buffalo & 3 \\\\\n",
473 | "\t Dakota & 3 \\\\\n",
474 | "\t Dixon & 3 \\\\\n",
475 | "\t Howard & 3 \\\\\n",
476 | "\t Douglas & 2 \\\\\n",
477 | "\t Holt & 2 \\\\\n",
478 | "\t Morrill & 2 \\\\\n",
479 | "\t Richardson & 2 \\\\\n",
480 | "\t Saunders & 2 \\\\\n",
481 | "\t sheridan & 2 \\\\\n",
482 | "\t Cedar & 1 \\\\\n",
483 | "\t Frontier & 1 \\\\\n",
484 | "\t Hall & 1 \\\\\n",
485 | "\t Hooker & 1 \\\\\n",
486 | "\t Keith & 1 \\\\\n",
487 | "\t Kimball & 1 \\\\\n",
488 | "\t Merrick & 1 \\\\\n",
489 | "\t Nance & 1 \\\\\n",
490 | "\t Platte & 1 \\\\\n",
491 | "\t Polk & 1 \\\\\n",
492 | "\t Sarpy & 1 \\\\\n",
493 | "\t Sherman & 1 \\\\\n",
494 | "\t Thurston & 1 \\\\\n",
495 | "\t Valley & 1 \\\\\n",
496 | "\t Wheeler & 1 \\\\\n",
497 | "\\end{tabular}\n"
498 | ],
499 | "text/markdown": [
500 | "\n",
501 | "COUNTY | count | \n",
502 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
503 | "| Dawes | 111 | \n",
504 | "| Sioux | 52 | \n",
505 | "| Sheridan | 35 | \n",
506 | "| Cherry | 30 | \n",
507 | "| Scotts Bluff | 26 | \n",
508 | "| Keya Paha | 20 | \n",
509 | "| Brown | 15 | \n",
510 | "| Rock | 11 | \n",
511 | "| Lincoln | 10 | \n",
512 | "| Custer | 8 | \n",
513 | "| Knox | 8 | \n",
514 | "| Banner | 6 | \n",
515 | "| Dawson | 5 | \n",
516 | "| Nemaha | 5 | \n",
517 | "| Thomas | 5 | \n",
518 | "| Box Butte | 4 | \n",
519 | "| Blaine | 3 | \n",
520 | "| Buffalo | 3 | \n",
521 | "| Dakota | 3 | \n",
522 | "| Dixon | 3 | \n",
523 | "| Howard | 3 | \n",
524 | "| Douglas | 2 | \n",
525 | "| Holt | 2 | \n",
526 | "| Morrill | 2 | \n",
527 | "| Richardson | 2 | \n",
528 | "| Saunders | 2 | \n",
529 | "| sheridan | 2 | \n",
530 | "| Cedar | 1 | \n",
531 | "| Frontier | 1 | \n",
532 | "| Hall | 1 | \n",
533 | "| Hooker | 1 | \n",
534 | "| Keith | 1 | \n",
535 | "| Kimball | 1 | \n",
536 | "| Merrick | 1 | \n",
537 | "| Nance | 1 | \n",
538 | "| Platte | 1 | \n",
539 | "| Polk | 1 | \n",
540 | "| Sarpy | 1 | \n",
541 | "| Sherman | 1 | \n",
542 | "| Thurston | 1 | \n",
543 | "| Valley | 1 | \n",
544 | "| Wheeler | 1 | \n",
545 | "\n",
546 | "\n"
547 | ],
548 | "text/plain": [
549 | " COUNTY count\n",
550 | "1 Dawes 111 \n",
551 | "2 Sioux 52 \n",
552 | "3 Sheridan 35 \n",
553 | "4 Cherry 30 \n",
554 | "5 Scotts Bluff 26 \n",
555 | "6 Keya Paha 20 \n",
556 | "7 Brown 15 \n",
557 | "8 Rock 11 \n",
558 | "9 Lincoln 10 \n",
559 | "10 Custer 8 \n",
560 | "11 Knox 8 \n",
561 | "12 Banner 6 \n",
562 | "13 Dawson 5 \n",
563 | "14 Nemaha 5 \n",
564 | "15 Thomas 5 \n",
565 | "16 Box Butte 4 \n",
566 | "17 Blaine 3 \n",
567 | "18 Buffalo 3 \n",
568 | "19 Dakota 3 \n",
569 | "20 Dixon 3 \n",
570 | "21 Howard 3 \n",
571 | "22 Douglas 2 \n",
572 | "23 Holt 2 \n",
573 | "24 Morrill 2 \n",
574 | "25 Richardson 2 \n",
575 | "26 Saunders 2 \n",
576 | "27 sheridan 2 \n",
577 | "28 Cedar 1 \n",
578 | "29 Frontier 1 \n",
579 | "30 Hall 1 \n",
580 | "31 Hooker 1 \n",
581 | "32 Keith 1 \n",
582 | "33 Kimball 1 \n",
583 | "34 Merrick 1 \n",
584 | "35 Nance 1 \n",
585 | "36 Platte 1 \n",
586 | "37 Polk 1 \n",
587 | "38 Sarpy 1 \n",
588 | "39 Sherman 1 \n",
589 | "40 Thurston 1 \n",
590 | "41 Valley 1 \n",
591 | "42 Wheeler 1 "
592 | ]
593 | },
594 | "metadata": {},
595 | "output_type": "display_data"
596 | }
597 | ],
598 | "source": [
599 | "mountainlions %>%\n",
600 | " group_by(COUNTY) %>%\n",
601 | " summarise(\n",
602 | " count = n(),\n",
603 | " ) %>% arrange(desc(count))"
604 | ]
605 | },
606 | {
607 | "cell_type": "markdown",
608 | "metadata": {},
609 | "source": [
610 | "## More basics\n",
611 | "\n",
612 | "In the last example, we grouped some data together and counted it up, but there's so much more you can do. You can do multiple measures in a single step as well. \n",
613 | "\n",
614 | "Let's look at some simple college data. "
615 | ]
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": 6,
620 | "metadata": {
621 | "collapsed": true
622 | },
623 | "outputs": [],
624 | "source": [
625 | "colleges <- read.csv(\"../../Data/colleges.csv\")"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 7,
631 | "metadata": {},
632 | "outputs": [
633 | {
634 | "data": {
635 | "text/html": [
636 | "\n",
637 | "| UnitID | Name | InState1213 | OutOfState1213 | GradRate |
\n",
638 | "\n",
639 | "\t| 151351 | Indiana University-Bloomington | 23116 | 44566 | 75 |
\n",
640 | "\t| 171100 | Michigan State University | 24028 | 43986 | 79 |
\n",
641 | "\t| 147767 | Northwestern University | 60840 | 60840 | 93 |
\n",
642 | "\t| 204796 | Ohio State University-Main Campus | 24919 | 40327 | 82 |
\n",
643 | "\t| 214777 | Pennsylvania State University-Main Campus | 31854 | 44156 | 86 |
\n",
644 | "\t| 243780 | Purdue University-Main Campus | 23468 | 42270 | 69 |
\n",
645 | "\n",
646 | "
\n"
647 | ],
648 | "text/latex": [
649 | "\\begin{tabular}{r|lllll}\n",
650 | " UnitID & Name & InState1213 & OutOfState1213 & GradRate\\\\\n",
651 | "\\hline\n",
652 | "\t 151351 & Indiana University-Bloomington & 23116 & 44566 & 75 \\\\\n",
653 | "\t 171100 & Michigan State University & 24028 & 43986 & 79 \\\\\n",
654 | "\t 147767 & Northwestern University & 60840 & 60840 & 93 \\\\\n",
655 | "\t 204796 & Ohio State University-Main Campus & 24919 & 40327 & 82 \\\\\n",
656 | "\t 214777 & Pennsylvania State University-Main Campus & 31854 & 44156 & 86 \\\\\n",
657 | "\t 243780 & Purdue University-Main Campus & 23468 & 42270 & 69 \\\\\n",
658 | "\\end{tabular}\n"
659 | ],
660 | "text/markdown": [
661 | "\n",
662 | "UnitID | Name | InState1213 | OutOfState1213 | GradRate | \n",
663 | "|---|---|---|---|---|---|\n",
664 | "| 151351 | Indiana University-Bloomington | 23116 | 44566 | 75 | \n",
665 | "| 171100 | Michigan State University | 24028 | 43986 | 79 | \n",
666 | "| 147767 | Northwestern University | 60840 | 60840 | 93 | \n",
667 | "| 204796 | Ohio State University-Main Campus | 24919 | 40327 | 82 | \n",
668 | "| 214777 | Pennsylvania State University-Main Campus | 31854 | 44156 | 86 | \n",
669 | "| 243780 | Purdue University-Main Campus | 23468 | 42270 | 69 | \n",
670 | "\n",
671 | "\n"
672 | ],
673 | "text/plain": [
674 | " UnitID Name InState1213 OutOfState1213\n",
675 | "1 151351 Indiana University-Bloomington 23116 44566 \n",
676 | "2 171100 Michigan State University 24028 43986 \n",
677 | "3 147767 Northwestern University 60840 60840 \n",
678 | "4 204796 Ohio State University-Main Campus 24919 40327 \n",
679 | "5 214777 Pennsylvania State University-Main Campus 31854 44156 \n",
680 | "6 243780 Purdue University-Main Campus 23468 42270 \n",
681 | " GradRate\n",
682 | "1 75 \n",
683 | "2 79 \n",
684 | "3 93 \n",
685 | "4 82 \n",
686 | "5 86 \n",
687 | "6 69 "
688 | ]
689 | },
690 | "metadata": {},
691 | "output_type": "display_data"
692 | }
693 | ],
694 | "source": [
695 | "head(colleges)"
696 | ]
697 | },
698 | {
699 | "cell_type": "markdown",
700 | "metadata": {},
701 | "source": [
702 | "In summarize, we can calculate any number of measures. Here, we'll use R's built in `mean` and `median` functions to calculate ... well, you get the idea. "
703 | ]
704 | },
705 | {
706 | "cell_type": "code",
707 | "execution_count": 8,
708 | "metadata": {},
709 | "outputs": [
710 | {
711 | "data": {
712 | "text/html": [
713 | "\n",
714 | "| count | instatemean | outstatemean | instatemedian | outstatemedian |
\n",
715 | "\n",
716 | "\t| 14 | 27652.86 | 42821.5 | 24473.5 | 42194 |
\n",
717 | "\n",
718 | "
\n"
719 | ],
720 | "text/latex": [
721 | "\\begin{tabular}{r|lllll}\n",
722 | " count & instatemean & outstatemean & instatemedian & outstatemedian\\\\\n",
723 | "\\hline\n",
724 | "\t 14 & 27652.86 & 42821.5 & 24473.5 & 42194 \\\\\n",
725 | "\\end{tabular}\n"
726 | ],
727 | "text/markdown": [
728 | "\n",
729 | "count | instatemean | outstatemean | instatemedian | outstatemedian | \n",
730 | "|---|\n",
731 | "| 14 | 27652.86 | 42821.5 | 24473.5 | 42194 | \n",
732 | "\n",
733 | "\n"
734 | ],
735 | "text/plain": [
736 | " count instatemean outstatemean instatemedian outstatemedian\n",
737 | "1 14 27652.86 42821.5 24473.5 42194 "
738 | ]
739 | },
740 | "metadata": {},
741 | "output_type": "display_data"
742 | }
743 | ],
744 | "source": [
745 | "colleges %>%\n",
746 | " summarise(\n",
747 | " count = n(),\n",
748 | " instatemean = mean(InState1213),\n",
749 | " outstatemean = mean(OutOfState1213),\n",
750 | " instatemedian = median(InState1213),\n",
751 | " outstatemedian = median(OutOfState1213),\n",
752 | " )"
753 | ]
754 | },
755 | {
756 | "cell_type": "markdown",
757 | "metadata": {},
758 | "source": [
759 | "Now, what if we just wanted to see the University of Nebraska-Lincoln? So we can compare it to the mean and median. To do that, we use `filter`, which does what it says on the tin. You can simply filter the things you want (or don't want) so your numbers reflect the things you are just looking at. So in this case, we're going to get all the records where the Name equals \"University of Nebraska-Lincoln\"."
760 | ]
761 | },
762 | {
763 | "cell_type": "code",
764 | "execution_count": 14,
765 | "metadata": {},
766 | "outputs": [
767 | {
768 | "data": {
769 | "text/html": [
770 | "\n",
771 | "| UnitID | Name | InState1213 | OutOfState1213 | GradRate |
\n",
772 | "\n",
773 | "\t| 181464 | University of Nebraska-Lincoln | 21700 | 34450 | 65 |
\n",
774 | "\n",
775 | "
\n"
776 | ],
777 | "text/latex": [
778 | "\\begin{tabular}{r|lllll}\n",
779 | " UnitID & Name & InState1213 & OutOfState1213 & GradRate\\\\\n",
780 | "\\hline\n",
781 | "\t 181464 & University of Nebraska-Lincoln & 21700 & 34450 & 65 \\\\\n",
782 | "\\end{tabular}\n"
783 | ],
784 | "text/markdown": [
785 | "\n",
786 | "UnitID | Name | InState1213 | OutOfState1213 | GradRate | \n",
787 | "|---|\n",
788 | "| 181464 | University of Nebraska-Lincoln | 21700 | 34450 | 65 | \n",
789 | "\n",
790 | "\n"
791 | ],
792 | "text/plain": [
793 | " UnitID Name InState1213 OutOfState1213 GradRate\n",
794 | "1 181464 University of Nebraska-Lincoln 21700 34450 65 "
795 | ]
796 | },
797 | "metadata": {},
798 | "output_type": "display_data"
799 | }
800 | ],
801 | "source": [
802 | "colleges %>% filter(Name == \"University of Nebraska-Lincoln\")"
803 | ]
804 | },
805 | {
806 | "cell_type": "markdown",
807 | "metadata": {},
808 | "source": [
809 | "## Assignment\n",
810 | "\n",
811 | "We're going to put it all together now. We're going to calculate the mean and median salaries of job titles at the University of Nebraska-Lincoln. \n",
812 | "\n",
813 | "Answer this question:\n",
814 | "\n",
815 | "**What are the top median salaries by job title at UNL? And how does that compare to the average salary for that position?**\n",
816 | "\n",
817 | "To do this, you'll need to [download this data](https://www.dropbox.com/s/yqweytz0sb6cd5f/nusalaries1718.csv?dl=0).\n",
818 | "\n",
819 | "#### Rubric\n",
820 | "\n",
821 | "1. Did you read the data into a dataframe? \n",
822 | "2. Did you use group by syntax correctly? \n",
823 | "3. Did you use summarize syntax correctly?\n",
824 | "4. Did you use filter syntax correctly?\n",
825 | "5. Did you use Markdown comments to explain your steps? "
826 | ]
827 | },
828 | {
829 | "cell_type": "code",
830 | "execution_count": null,
831 | "metadata": {
832 | "collapsed": true
833 | },
834 | "outputs": [],
835 | "source": []
836 | }
837 | ],
838 | "metadata": {
839 | "anaconda-cloud": {},
840 | "kernelspec": {
841 | "display_name": "R",
842 | "language": "R",
843 | "name": "ir"
844 | },
845 | "language_info": {
846 | "codemirror_mode": "r",
847 | "file_extension": ".r",
848 | "mimetype": "text/x-r-source",
849 | "name": "R",
850 | "pygments_lexer": "r",
851 | "version": "3.4.1"
852 | }
853 | },
854 | "nbformat": 4,
855 | "nbformat_minor": 1
856 | }
857 |
--------------------------------------------------------------------------------