├── Assignments ├── .gitignore ├── 17_Maps │ ├── screen1.png │ ├── .ipynb_checkpoints │ │ └── Untitled-checkpoint.ipynb │ ├── mapping_challenge.md │ └── maps.md ├── 14_Transparency │ ├── dog.jpg │ ├── screen1.png │ ├── screen2.png │ ├── screen3.png │ ├── Transparency.ipynb │ └── .ipynb_checkpoints │ │ └── Transparency-checkpoint.ipynb ├── 18_LatticeMaps │ ├── map.png │ └── latticemaps.md ├── 11_FinishingTouches │ ├── unlpd.pdf │ ├── chartannotated.png │ └── .ipynb_checkpoints │ │ └── FinishingTouches-checkpoint.ipynb ├── 1_Installations │ ├── helloworld.png │ ├── .ipynb_checkpoints │ │ └── Hello World in R-checkpoint.ipynb │ ├── Hello World in R.ipynb │ └── installing_jupyter_notebook.md ├── 9_StackedAreaCharts │ └── UNLPD.pdf ├── 2_R_Basics │ ├── .ipynb_checkpoints │ │ └── RBasics-checkpoint.ipynb │ ├── RBasics.ipynb │ └── RBasicsPart2.ipynb ├── 15_Treemaps │ └── .ipynb_checkpoints │ │ └── Treemaps-checkpoint.ipynb ├── 19_WaffleCharts │ └── .ipynb_checkpoints │ │ └── Untitled-checkpoint.ipynb ├── 8_LineCharts │ └── .ipynb_checkpoints │ │ └── LineCharts-checkpoint.ipynb ├── 10_Scatterplots │ └── .ipynb_checkpoints │ │ └── Scatterplots-checkpoint.ipynb ├── 12_BubbleCharts │ └── .ipynb_checkpoints │ │ └── BubbleCharts-checkpoint.ipynb ├── 13_LatticeCharts │ └── .ipynb_checkpoints │ │ └── FacetCharts-checkpoint.ipynb ├── 3_PercentChange │ └── .ipynb_checkpoints │ │ └── PercentChange-checkpoint.ipynb ├── 6_IntroToggplot │ └── .ipynb_checkpoints │ │ └── IntroToggplot-checkpoint.ipynb ├── 5_RecastingData │ └── .ipynb_checkpoints │ │ └── Recasting Data-checkpoint.ipynb ├── 4_WorkingWithDates │ └── .ipynb_checkpoints │ │ └── WorkingWithDates-checkpoint.ipynb └── 16_LiveFireExercise │ └── LiveFireExercise.ipynb ├── Lectures ├── Chartjunk.pptx ├── BeautyVsTufte.pptx ├── 1_Introductions.pptx ├── 2_WhatIsDataViz.pptx ├── FormAndFunction.pptx ├── HowYourBrainWorks1.pptx ├── AdvancedDataCleaning.pptx ├── ImprovingVisualizations.pptx ├── Cairo1InformationToWisdom.pptx └── Class3_GraphicalExcellence.pptx ├── Resources └── Git Quick Reference.pdf ├── .gitignore_global ├── .gitignore ├── Data ├── colleges.csv ├── enrollment.csv ├── fac2chp.csv ├── registeredvoters.csv └── mountainlions.csv ├── README.md ├── LICENSE.md └── syllabus.md /Assignments/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /Lectures/Chartjunk.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Chartjunk.pptx -------------------------------------------------------------------------------- /Lectures/BeautyVsTufte.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/BeautyVsTufte.pptx -------------------------------------------------------------------------------- /Lectures/1_Introductions.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/1_Introductions.pptx -------------------------------------------------------------------------------- /Lectures/2_WhatIsDataViz.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/2_WhatIsDataViz.pptx -------------------------------------------------------------------------------- /Lectures/FormAndFunction.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/FormAndFunction.pptx -------------------------------------------------------------------------------- /Assignments/17_Maps/screen1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/17_Maps/screen1.png -------------------------------------------------------------------------------- /Lectures/HowYourBrainWorks1.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/HowYourBrainWorks1.pptx -------------------------------------------------------------------------------- /Resources/Git Quick Reference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Resources/Git Quick Reference.pdf -------------------------------------------------------------------------------- /Assignments/14_Transparency/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/dog.jpg -------------------------------------------------------------------------------- /Assignments/18_LatticeMaps/map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/18_LatticeMaps/map.png -------------------------------------------------------------------------------- /Lectures/AdvancedDataCleaning.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/AdvancedDataCleaning.pptx -------------------------------------------------------------------------------- /Lectures/ImprovingVisualizations.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/ImprovingVisualizations.pptx -------------------------------------------------------------------------------- /Assignments/14_Transparency/screen1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen1.png -------------------------------------------------------------------------------- /Assignments/14_Transparency/screen2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen2.png -------------------------------------------------------------------------------- /Assignments/14_Transparency/screen3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/14_Transparency/screen3.png -------------------------------------------------------------------------------- /Lectures/Cairo1InformationToWisdom.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Cairo1InformationToWisdom.pptx -------------------------------------------------------------------------------- /Lectures/Class3_GraphicalExcellence.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Lectures/Class3_GraphicalExcellence.pptx -------------------------------------------------------------------------------- /Assignments/11_FinishingTouches/unlpd.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/11_FinishingTouches/unlpd.pdf -------------------------------------------------------------------------------- /Assignments/1_Installations/helloworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/1_Installations/helloworld.png -------------------------------------------------------------------------------- /Assignments/9_StackedAreaCharts/UNLPD.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/9_StackedAreaCharts/UNLPD.pdf -------------------------------------------------------------------------------- /Assignments/17_Maps/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/2_R_Basics/.ipynb_checkpoints/RBasics-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/15_Treemaps/.ipynb_checkpoints/Treemaps-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/19_WaffleCharts/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/8_LineCharts/.ipynb_checkpoints/LineCharts-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/10_Scatterplots/.ipynb_checkpoints/Scatterplots-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/11_FinishingTouches/chartannotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattwaite/JOUR491-Data-Visualization/HEAD/Assignments/11_FinishingTouches/chartannotated.png -------------------------------------------------------------------------------- /Assignments/12_BubbleCharts/.ipynb_checkpoints/BubbleCharts-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/13_LatticeCharts/.ipynb_checkpoints/FacetCharts-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/3_PercentChange/.ipynb_checkpoints/PercentChange-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/6_IntroToggplot/.ipynb_checkpoints/IntroToggplot-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/1_Installations/.ipynb_checkpoints/Hello World in R-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/5_RecastingData/.ipynb_checkpoints/Recasting Data-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/11_FinishingTouches/.ipynb_checkpoints/FinishingTouches-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Assignments/4_WorkingWithDates/.ipynb_checkpoints/WorkingWithDates-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore_global: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | ehthumbs.db 37 | Thumbs.db 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | ehthumbs.db 37 | Thumbs.db 38 | 39 | # Microsoft shite # 40 | ~$* 41 | 42 | *__ANSWERS* 43 | -------------------------------------------------------------------------------- /Data/colleges.csv: -------------------------------------------------------------------------------- 1 | UnitID,Name,InState1213,OutOfState1213,GradRate 151351,Indiana University-Bloomington,23116,44566,75 171100,Michigan State University,24028,43986,79 147767,Northwestern University,60840,60840,93 204796,Ohio State University-Main Campus,24919,40327,82 214777,Pennsylvania State University-Main Campus,31854,44156,86 243780,Purdue University-Main Campus,23468,42270,69 186380,Rutgers University-New Brunswick,28798,42118,79 145637,University of Illinois at Urbana-Champaign,28564,42706,84 153658,University of Iowa,21832,40054,70 163286,University of Maryland-College Park,23346,41725,82 170976,University of Michigan-Ann Arbor,25848,51976,91 174066,University of Minnesota-Twin Cities,25065,30315,73 181464,University of Nebraska-Lincoln,21700,34450,65 240444,University of Wisconsin-Madison,23762,40012,82 -------------------------------------------------------------------------------- /Data/enrollment.csv: -------------------------------------------------------------------------------- 1 | Year,Date,Enrollment 1967,1967-1-1,18067 1968,1968-1-1,19150 1969,1969-1-1,19618 1970,1970-1-1,20810 1971,1971-1-1,21541 1972,1972-1-1,21581 1973,1973-1-1,21160 1974,1974-1-1,20892 1975,1975-1-1,22380 1976,1976-1-1,22179 1977,1977-1-1,22256 1978,1978-1-1,22477 1979,1979-1-1,23661 1980,1980-1-1,24128 1981,1981-1-1,24786 1982,1982-1-1,25075 1983,1983-1-1,24789 1984,1984-1-1,24228 1985,1985-1-1,24020 1986,1986-1-1,23899 1987,1987-1-1,23469 1988,1988-1-1,23985 1989,1989-1-1,23926 1990,1990-1-1,24453 1991,1991-1-1,24620 1992,1992-1-1,24573 1993,1993-1-1,24491 1994,1994-1-1,23854 1995,1995-1-1,24320 1996,1996-1-1,23887 1997,1997-1-1,22827 1998,1998-1-1,22408 1999,1999-1-1,22142 2000,2000-1-1,22268 2001,2001-1-1,22764 2002,2002-1-1,22988 2003,2003-1-1,22559 2004,2004-1-1,21792 2005,2005-1-1,21675 2006,2006-1-1,22106 2007,2007-1-1,22973 2008,2008-1-1,23573 2009,2009-1-1,24100 2010,2010-1-1,24610 2011,2011-1-1,24593 2012,2012-1-1,24207 2013,2013-1-1,24445 2014,2014-1-1,25006 2015,2015-1-1,25260 2016,2016-1-1,25897 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JOUR407 Data Visualization 2 | 3 | Course materials for a data visualization course taught at the University of Nebraska-Lincoln's College of Journalism and Mass Communications. 4 | 5 | ## About this course 6 | 7 | This course is first and foremost an experiment in student learning at a journalism school. It is not an online course. These materials are here to augment the classroom experience. The materials here are being posted publicly and are open for use, reuse or contributions if you so desire. 8 | 9 | ## Course goals 10 | 11 | * Introduce students to data visualization tools and techniques 12 | * Understand the theories of what makes for good data visualization 13 | * Get hands on with gathering, cleaning, refining and analyzing data before visualization 14 | * Get hands on with programmatic tools for analysis and visualization in R 15 | * Learn to apply best practices for good data visualization to each of the data visualization tools 16 | * Get experience using GitHub for version control 17 | * Get experience with other development environments when working with code. 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2013 Matthew D. Waite 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | 13 | -------------------------------------------------------------------------------- /Assignments/17_Maps/mapping_challenge.md: -------------------------------------------------------------------------------- 1 | # Mapping Challenge: Is it a map or something else? 2 | 3 | China announced this week that it will impose tariffs on US agricultural products. It is a move designed to [target Trump voters in midwestern states](https://www.politico.com/story/2018/04/04/how-china-will-target-us-agriculture-458530). Media have noted that Trump won most of the states where soybeans are produced. 4 | 5 | In my opinion, state level numbers are too imprecise. We need to go to the county level. 6 | 7 | The questions: 8 | 9 | * Where are soybeans produced in the United States? 10 | * How did Trump perform in those counties? 11 | * What are the population trends in those counties? Could the costs of these tariff's be more than money? 12 | 13 | What, in these questions, is a map? 14 | 15 | **Turn in what you have by the end of class.** 16 | 17 | ### Data 18 | 19 | * [Soybean production in bushels](https://www.dropbox.com/s/hnjx2dazkag62g5/soybeans2012.csv?dl=0), by county in 2012, from the USDA. Most recent data available. 20 | * [Election results by county](https://www.dropbox.com/s/uib1uc6dj0u99wt/2016president__county.csv?dl=0) in 2016, from OpenElex. 21 | * Population data you have from previous assignments. 22 | -------------------------------------------------------------------------------- /Data/fac2chp.csv: -------------------------------------------------------------------------------- 1 | Department,TotalFaculty,CreditHours 2 | Advertising,24,3198 3 | Agricultural Economics,23,149 4 | "Agricultural Leadership, Education and Co",19,3370 5 | Agronomy and Horticulture,50,4949 6 | Animal Science,29,3331 7 | Anthropology,9,6016 8 | Architecture,28,3980 9 | Art and Art History,28,5378 10 | Biochemistry,22,1849 11 | Biological Systems Engineering,31,3175 12 | Broadcasting,7,454 13 | Business Administration,6,5583 14 | Chemical and Biomolecular Engineering,17,1713 15 | Chemistry,28,12998 16 | "Children, Youth and Family Studies",35,5085 17 | Civil Engineering,31,2195 18 | Classics & Religious Studies,14,3988 19 | Communication Studies,16,5626 20 | Community and Regional Planning,5,584 21 | Computer Science,25,6287 22 | Durham School Arch Engr & Const,36,3475 23 | Earth and Atmospheric Sciences,21,2554 24 | Economics,17,9152 25 | Educational Administration,20,1901 26 | Educational Psychology,27,4790 27 | Electrical & Computer Engineering,38,4495 28 | English,68,14936 29 | Entomology,13,1281 30 | Finance,25,6131 31 | History,32,7358 32 | Johnny Carson School of Theatre and Film,19,5071 33 | Journalism and Mass Communicat,25,5327 34 | Law,48,5753 35 | Management,25,10927 36 | Marketing,16,4838 37 | Mathematics,49,23018 38 | Mechanical & Materials Engineering,43,5737 39 | Modern Language & Literature,37,9486 40 | Nutrition & Health Sciences,34,8142 41 | Philosophy,13,3319 42 | Physics and Astronomy,30,7525 43 | Political Science,19,5011 44 | Psychology,35,12437 45 | Biological Sciences,44,8183 46 | School of Music,58,8962 47 | Sociology,24,5248 48 | Special Ed & Communication,56,5621 49 | Statistics,3,3773 50 | Teach/Learn & Teacher Ed,72,7935 51 | "Textiles, Merchandising and Fashion Desig Professor",12,1834 -------------------------------------------------------------------------------- /Assignments/1_Installations/Hello World in R.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Hello world\n", 8 | "\n", 9 | "Jupyter Notebook, which is what you're using now, is a browser based interactive code environment. In this case, we're using it for R, a stats language. The first thing you always do in a code environment -- it's a law -- is write code that prints Hello World! on the screen. So lets do that. In the next field open box, type this:\n", 10 | "\n", 11 | "```\n", 12 | "words <- \"Hello World!\"\n", 13 | "\n", 14 | "print(words)\n", 15 | "```" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "source": [ 31 | "The next thing you need to do is add text like this. You can do that by just changing the cell type. You do that in the menu above. It looks like this:\n", 32 | "\n", 33 | "![menu bar](helloworld.png)\n", 34 | "\n", 35 | "So, in the next field, change the field type to Markdown, and add this code:\n", 36 | "\n", 37 | "```\n", 38 | "# This is a giant headline\n", 39 | "\n", 40 | "This is text.\n", 41 | "\n", 42 | "[This is a link](http://www.google.com/)\n", 43 | "\n", 44 | "```\n", 45 | "\n", 46 | "You can see more of what you can do with Markdown [on this cheatsheet](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet). " 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "anaconda-cloud": {}, 61 | "kernelspec": { 62 | "display_name": "R", 63 | "language": "R", 64 | "name": "ir" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": "r", 68 | "file_extension": ".r", 69 | "mimetype": "text/x-r-source", 70 | "name": "R", 71 | "pygments_lexer": "r", 72 | "version": "3.4.1" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 1 77 | } 78 | -------------------------------------------------------------------------------- /Assignments/1_Installations/installing_jupyter_notebook.md: -------------------------------------------------------------------------------- 1 | # Installing Jupyter Notebook and R on your computer 2 | 3 | 1. First, download and install Anaconda on your computer. You can download it here: [https://www.continuum.io/downloads](https://www.continuum.io/downloads) WARNING: It will take up 845MB of your hard drive. Install the version for Python 3.5. **IF YOU HAVE ALREADY INSTALL ANACONDA FOR A PREVIOUS CLASS, YOU DO NOT NEED TO DO THIS AGAIN.** 4 | 2. Open your terminal (Mac) or Command Prompt (Windows) and type: `conda list` 5 | 3. If a big stream of text goes by, you've got Anaconda installed. 6 | 4. If you get an error or nothing happens, come find me. **STOP HERE IF YOU GET AN ERROR MESSAGE**. 7 | 5. Let's update Anaconda by typing in your terminal `conda update conda` and then type `y` when it asks if you want to update the packages. 8 | 6. Now let's create an environment for us to work in. Type `conda create --name dataviz python=2 jupyter` into your terminal and say yes to the installation. What this does is it creates a whole new python environment that won't interfere with any python environments you have set up before (Mac users: Apple installs Python 2.7 in your system and messing with it can have disastrous consequences for your machine. Thus, environments). 9 | 7. Activate your new environment by typing `source activate dataviz` on a Mac/Linux and `activate dataviz` on a Windows machine. 10 | 8. Now let's install R and a ton of packages. To do that, we'll use Anaconda's R Essentials. Type `conda install r-essentials` and wait for it to finish. 11 | 9. Let's check if Jupyter Notebook is installed correctly by typing `jupyter notebook` and watching to see if a browser pops up with stuff in it. It should use your default browser. 12 | 10. On the top right of the browser, you should see a dropdown menu called New. Click that, and under Notebooks you should see R. 13 | 14 | If you see R there, then go download [this notebook](https://www.dropbox.com/s/1mn03dbf18llah1/Hello%20World%20in%20R.ipynb?dl=0), open it with Jupyter Notebooks and follow along with it. 15 | 16 | When you are done, go to File > Close and Halt. Then go to your terminal and hit control C and say yes to shutting down he Jupyter server. Your last step of the day: Type `source deactivate` on a Mac/Linux or `deactivate` on Windows. This exits out of your environment and returns you to your normal computer. 17 | 18 | We will do that every time we use this dataviz environment: We'll activate it when we start and deactivate it when we stop. 19 | -------------------------------------------------------------------------------- /Assignments/18_LatticeMaps/latticemaps.md: -------------------------------------------------------------------------------- 1 | # Lattice maps 2 | ### aka my computer is on fire 3 | 4 | Lattice maps are a very cool way of doing small multiples with maps. But they are not for the uncommitted -- they're large, they require time and processing, and you are going to hear your laptop fan. 5 | 6 | First we'll get set up with what we'll need. 7 | 8 | ```r 9 | library(dplyr) 10 | library(ggplot2) 11 | library(sf) 12 | library(albersusa) 13 | library(repr) 14 | options(repr.plot.width=11, repr.plot.height=8) 15 | ``` 16 | Next we'll get our data, which is a file of local area unemployment rates by county from the Bureau of Labor Statistics from Dec. 2016 to Jan. 2018. I've cleaned out some junk from the BLS. [You can download my data here](https://www.dropbox.com/s/j6qiad39f9l27nl/laucntycur14.csv?dl=0). 17 | 18 | ```r 19 | unemp <- read.csv("~/Dropbox/JOUR407-Data-Visualization/Data/laucntycur14.csv", colClasses=c("State"="character", "County"="character")) 20 | ``` 21 | 22 | Now, just like our previous mapping exercise, we'll create a `fips` field so we can join the data to the map. 23 | 24 | ```r 25 | geoid <- unemp %>% mutate( 26 | fips = paste(State, County, sep="") 27 | ) 28 | ``` 29 | 30 | Now we'll bring in a county map. 31 | 32 | ``` 33 | county_geom <- counties_sf("aeqd") 34 | ``` 35 | 36 | Ths is where lattice maps get different. With a single county map, we just wanted to join the map to the data and we had one county for one mapped county. With lattice maps, we need to join the MAP to the DATA. Meaning we're going to have lots of copies of the map around because we've joined them to the data that has lots of repeats of the county, one for each year the dataset covers. 37 | 38 | ``` 39 | nation <- geoid %>% inner_join(county_geom, by="fips") 40 | ``` 41 | We'll set up our map theme to get rid of all the cruft. 42 | 43 | ``` 44 | theme_map <- theme( 45 | panel.background = element_blank(), 46 | plot.background = element_blank(), 47 | panel.grid.minor = element_blank(), 48 | text = element_text(family = "Helvetica", size=16), 49 | axis.title = element_text(size=12), 50 | axis.ticks = element_blank(), 51 | strip.background = element_blank(), 52 | panel.grid.major = element_line(colour = 'transparent'), 53 | axis.text = element_blank() 54 | ) 55 | ``` 56 | 57 | This bit is new. Our dates aren't really dates, and we need them in order. So we're going to create a new field called period_f, which is a factor, and we're going to spell out the order we want them to be in. 58 | 59 | ``` 60 | nation$period_f = factor(nation$Period, levels=c('Dec-16', 'Jan-17','Feb-17','Mar-17','Apr-17','May-17', 'Jun-17', 'Jul-17', 'Aug-17', 'Sep-17', 'Oct-17', 'Nov-17', 'Dec-17', 'Dec-17 p', 'Jan-18 p')) 61 | ``` 62 | 63 | Now, we make a map. If you've followed along until now, get comfortable. This takes a while. 64 | 65 | ``` 66 | ggplot(nation) + theme_map + geom_sf(col="transparent", aes(fill=Rate), color = NA) + scale_fill_gradient(low = "#00005C", high = "#F5AD00", guide = FALSE) + facet_wrap(~period_f) 67 | ``` 68 | 69 | The facet_wrap works just the same as other facet charts. 70 | 71 | The result: 72 | 73 | !["facet map"](map.png) 74 | -------------------------------------------------------------------------------- /Assignments/17_Maps/maps.md: -------------------------------------------------------------------------------- 1 | # Mapping with R 2 | 3 | First and foremost, I want to be clear on this: Mapping is hard. There is a lot going on, and a lot of external libraries necessary to make it work. So this won't be as easy as working with standard datasets and libraries. 4 | 5 | We're going to borrow heavily from John Burn-Murdoch's [good work](https://twitter.com/jburnmurdoch/status/981074810020204544) that he graciously included the code [here](https://gist.githubusercontent.com/johnburnmurdoch/2dd39f56631ffffe4a99633c76781a1e/raw/d620455ad45b07e15c141318c0a9a437ffeb5096/main.R). 6 | 7 | To do this, we're going to have to get out of Jupyter Notebooks and Anaconda, which has terrible support for various mapping libraries. We're going to use the industry standard tool for using R, which is called R Studio. To install R Studio, we have to first install R by downloading your computer's version [here](https://cran.rstudio.com/). 8 | 9 | Then install RStudio by going [here](https://www.rstudio.com/products/rstudio/download/#download). 10 | 11 | Open R Studio. Here's what it will look like. We're going to do our work in the console on the left. 12 | 13 | ![screenshop](screen1.png) 14 | 15 | Now we have to set up our environment by installing all the packages we're used to plus more. 16 | 17 | ```R 18 | install.packages('devtools') 19 | install.packages('sf') 20 | devtools::install_github("hrbrmstr/albersusa") 21 | devtools::install_github("tidyverse/ggplot2") 22 | install.packages('dplyr') 23 | ``` 24 | 25 | Now let's load some libraries up: 26 | 27 | ```R 28 | library('sf') 29 | library('ggplot2') 30 | library('dplyr') 31 | library('utils') 32 | library('magrittr') 33 | library('albersusa') 34 | ``` 35 | First things first, let's get some map data from the `sf` library. Then let's get our population data we used in the Live Fire Exercise last time. This code adds a couple of directives that tell R to load a column in as text, not numbers, to preserve the leading zeros. This will be important later. 36 | 37 | ```R 38 | county_geom <- counties_sf("aeqd") 39 | 40 | population <- read.csv(url("https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/counties/totals/co-est2017-alldata.csv"), colClasses=c("STATE"="character", "COUNTY"="character")) 41 | ``` 42 | This part is like you are accustomed to. Let's create a new dataframe, filter out statewide totals, create a couple of fields to create a new idenfitier out of state and county fips codes and calculate percent change. Then we'll slim our dataset down to just the fields we need. 43 | 44 | After that, we'll join our new data to the map using the fips code as the join condition. 45 | 46 | ```R 47 | countygeoid <- population %>% 48 | filter(SUMLEV==50) %>% 49 | mutate( 50 | fips = paste(STATE, COUNTY, sep=""), 51 | change = ((POPESTIMATE2017-POPESTIMATE2010)/POPESTIMATE2010)*100 52 | ) %>% 53 | select(c(fips, STNAME, CTYNAME, change)) 54 | 55 | nation <- county_geom %>% inner_join(countygeoid, by="fips") 56 | ``` 57 | 58 | Now, before we map, let's make a theme for our map, called `map_theme` that gets rid of lots of the extra cruft that `ggplot2` adds to maps. 59 | 60 | ```R 61 | theme_map <- theme( 62 | panel.background = element_blank(), 63 | plot.background = element_blank(), 64 | panel.grid.minor = element_blank(), 65 | text = element_text(family = "Helvetica", size=16), 66 | axis.title = element_text(size=12), 67 | axis.ticks = element_blank(), 68 | strip.background = element_blank(), 69 | panel.grid.major = element_line(colour = 'transparent'), 70 | axis.text = element_blank() 71 | ) 72 | ``` 73 | 74 | Now let's make a map. After all this, it's similar to what you are used to: use ggplot, tell it the dataframe, then use a special geom called `geom_sf` to render the map. If it's all gone well, we should see a map. 75 | 76 | ```R 77 | ggplot(nation) + 78 | theme_map + 79 | geom_sf(col="transparent", aes(fill=change)) + 80 | scale_fill_distiller(type="div", direction=-1) 81 | ``` 82 | 83 | What's the problem here? 84 | -------------------------------------------------------------------------------- /Data/registeredvoters.csv: -------------------------------------------------------------------------------- 1 | County,Republican10,Democrat10,Libertarian10,Nonpartisan10,Total10,Republican16,Democrat16,Nonpartisan16,Libertarian16,Total16 Adams,10018,5536,6,2972,18532,10746,5027,3591,163,19527 Antelope,3005,1147,0,538,4690,3088,863,594,12,4557 Arthur,284,52,0,10,346,286,37,15,3,341 Banner,424,53,0,53,530,427,38,73,7,545 Blaine,314,56,0,24,394,310,43,29,2,384 Boone,2390,1156,0,408,3954,2469,901,404,11,3785 Box Butte,4115,2347,3,1286,7751,4278,1852,1395,52,7577 Boyd,1036,338,0,129,1503,1084,250,156,4,1494 Brown,1663,363,0,224,2250,1658,253,214,9,2134 Buffalo,15768,6785,14,4537,27104,16974,6453,5461,305,29193 Burt,2521,1694,0,834,5049,2540,1440,878,35,4893 Butler,3044,2134,0,939,6117,3066,1587,913,19,5585 Cass,8216,5435,6,3755,17412,8472,4633,4025,180,17310 Cedar,3072,2231,0,886,6189,3503,1574,869,14,5960 Chase,1827,526,0,263,2616,1830,371,277,8,2486 Cherry,2945,780,0,433,4158,3154,566,455,10,4185 Cheyenne,4146,1484,4,1266,6900,4250,1221,1336,56,6863 Clay,2889,1081,0,629,4599,2743,848,650,17,4258 Colfax,2364,2394,0,587,5345,2332,2018,701,18,5069 Cuming,3639,1485,1,754,5879,3635,1240,889,23,5787 Custer,5450,1871,1,1044,8366,5427,1440,1180,55,8102 Dakota,3577,4238,3,2270,10088,3674,4255,2691,80,10700 Dawes,3505,1422,1,1082,6010,3469,1316,1193,48,6026 Dawson,7666,3620,3,2289,13578,7707,3660,2434,115,13916 Deuel,1027,202,0,214,1443,998,176,207,10,1391 Dixon,1887,1309,0,618,3814,2272,957,545,16,3790 Dodge,10683,7132,2,4059,21876,11009,6265,4669,170,22113 Douglas,120973,125194,123,66976,313266,129688,132466,80315,3497,345966 Dundy,1013,221,0,134,1368,958,155,158,1,1272 Fillmore,2317,1230,0,590,4137,2378,1024,658,18,4078 Franklin,1356,608,1,339,2304,1428,483,360,10,2281 Frontier,1316,389,0,231,1936,1283,299,263,9,1854 Furnas,2240,784,2,484,3510,2291,649,533,19,3492 Gage,7421,5072,0,2709,15202,7166,4091,2778,101,14136 Garden,1156,263,0,131,1550,1091,221,145,5,1462 Garfield,959,244,1,117,1321,966,194,118,3,1281 Gosper,1003,314,0,175,1492,1048,246,181,5,1480 Grant,381,63,0,41,485,418,45,37,0,500 Greeley,578,1073,0,166,1817,626,814,152,2,1594 Hall,15782,9824,2,6066,31674,16436,9538,6803,249,33026 Hamilton,4107,1512,1,838,6458,4365,1252,944,49,6610 Harlan,1559,675,0,301,2535,1575,528,342,7,2452 Hayes,577,110,0,82,769,551,84,84,1,720 Hitchcock,1210,494,0,316,2020,1278,350,356,12,1996 Holt,4981,1549,2,710,7242,5102,1151,723,23,6999 Hooker,464,95,0,50,609,420,80,40,2,542 Howard,2241,1602,1,547,4391,2448,1347,640,21,4456 Jefferson,2775,1688,2,853,5318,2691,1338,922,30,4981 Johnson,1539,1021,0,387,2947,1532,798,422,15,2767 Kearney,2784,1050,1,596,4431,2818,902,685,29,4434 Keith,4054,1396,0,1073,6523,4024,1078,1062,39,6203 Keya Paha,561,94,0,31,686,516,76,44,3,639 Kimball,1987,587,1,451,3026,1920,506,552,11,2989 Knox,3118,2014,0,822,5954,3312,1621,871,18,5822 Lancaster,74342,67217,46,36208,177813,76898,68127,40130,1895,187050 Lincoln,12419,7120,6,3668,23213,13574,5968,4275,195,24012 Logan,415,90,0,54,559,425,70,68,1,564 Loup,400,83,0,35,518,421,77,26,1,525 Madison,12462,5425,5,3480,21372,12548,4888,3892,134,21462 McPherson,300,72,0,15,387,307,40,24,3,374 Merrick,3194,1345,1,729,5269,3213,998,715,28,4954 Morrill,2157,724,1,512,3394,2137,594,525,27,3283 Nance,1242,980,0,322,2544,1269,770,335,9,2383 Nemaha,2664,1609,1,737,5011,2462,1259,817,25,4563 Nuckolls,1786,1078,0,411,3275,1850,838,456,18,3162 Otoe,5350,3350,3,2068,10771,5324,2783,2264,76,10447 Pawnee,1061,618,0,270,1949,1084,494,270,10,1858 Perkins,1316,460,0,283,2059,1368,327,280,7,1982 Phelps,4179,1341,0,815,6335,4297,1048,857,33,6235 Pierce,3188,1025,0,663,4876,3302,766,785,19,4872 Platte,10998,5657,6,2774,19435,12051,4867,3057,141,20116 Polk,2192,908,1,407,3508,2298,730,473,15,3516 Red Willow,4300,1668,4,1355,7327,4621,1240,1341,37,7239 Richardson,3170,2174,1,842,6187,3004,1754,889,22,5669 Rock,829,161,0,71,1061,825,125,73,1,1024 Saline,2940,3709,4,1352,8005,3062,3303,1416,49,7830 Sarpy,43657,27188,40,21900,92785,50057,29538,27620,1258,108473 Saunders,7229,4851,3,2098,14181,8124,3952,2524,96,14696 Scotts Bluff,12734,6821,11,4305,23871,13033,6067,4854,185,24139 Seward,5841,3281,2,1778,10902,6109,2761,1991,65,10926 Sheridan,2710,843,0,557,4110,2755,678,618,15,4066 Sherman,930,996,0,171,2097,983,820,196,11,2010 Sioux,687,127,0,109,923,696,108,107,8,919 Stanton,2517,832,0,510,3859,2454,752,546,30,3782 Thayer,2298,1011,0,480,3789,2316,864,594,17,3791 Thomas,399,75,0,34,508,409,47,49,1,506 Thurston,1324,2514,0,576,4414,1277,2582,660,10,4529 Valley,1904,933,0,340,3177,1895,745,348,7,2995 Washington,7613,3694,1,2626,13934,8201,3212,2794,115,14322 Wayne,3300,1204,0,804,5308,3255,1101,836,44,5236 Webster,1512,757,1,343,2613,1456,574,371,12,2413 Wheeler,451,155,0,48,654,420,127,57,3,607 York,7168,2188,2,1432,10790,6658,1783,1510,62,10013 -------------------------------------------------------------------------------- /Assignments/14_Transparency/Transparency.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Transparency and publication\n", 8 | "\n", 9 | ">__\"You should have a strong command of at least one toolset that (a) allows for filtering, joining, pivoting, and aggregating tabular data, and (b) enables reproducible workflows.\"__ -- Buzzfeed job posting, 2017\n", 10 | "\n", 11 | ">__\"As a general rule, all assertions in a story based on data analysis should be reproducible. The methodology description in the story or accompanying materials should provide a road map to replicate the analysis.\"__ -- The Associated Press Stylebook\n", 12 | "\n", 13 | "Trust in media is low and declining. The reasons for this are myriad, and we can control only what we can control. One thing we can do -- be more transparent about what we do. And this notion goes beyond just journalism -- why should anyone trust what you have to say if you can't show your work? \n", 14 | "\n", 15 | "Jupyter Notebooks are good at this, being able to mix code and text. But your notebooks are currently only visible to you. So we're going to work on improving your notebooks with Markdown and Github.\n", 16 | "\n", 17 | "### Markdown\n", 18 | "\n", 19 | "Markdown is what you are writing in when you aren't writing code in Jupyter Notebooks. It's very simple, and there's only a finite number of things you can do with it, but you can drasically improve your notebooks with some simple typographic tricks. Here's a partial listing of what you can do in Markdown that might be useful in notebooks.\n", 20 | "\n", 21 | "#### Headers\n", 22 | "\n", 23 | "```\n", 24 | "# h1\n", 25 | "## h2\n", 26 | "### h3\n", 27 | "#### h4\n", 28 | "##### h5\n", 29 | "```\n", 30 | "\n", 31 | "Which looks like:\n", 32 | "\n", 33 | "# h1\n", 34 | "## h2\n", 35 | "### h3\n", 36 | "#### h4\n", 37 | "##### h5\n", 38 | "\n", 39 | "#### Blockquotes\n", 40 | "\n", 41 | "To get a block quote, add `>` at the beginning of the line. \n", 42 | "\n", 43 | "It looks like:\n", 44 | "\n", 45 | "> This is a block quote\n", 46 | "\n", 47 | "#### Horizontal rule\n", 48 | "\n", 49 | "You can use a horizontal rule to separate content -- a thematic break. \n", 50 | "\n", 51 | "In Jupyter notebooks, you create a horizontal rule with three dashes: `---`\n", 52 | "\n", 53 | "Which looks like:\n", 54 | "\n", 55 | "---\n", 56 | "\n", 57 | "#### Text treatments\n", 58 | "\n", 59 | "You can **bold** text, _italicize_ text, even ~~strikethrough~~ text with `**bold**`, `_italicize_` and `~~strikethrough~~`. \n", 60 | "\n", 61 | "#### Lists\n", 62 | "\n", 63 | "You can create bulleted or numbered lists like this:\n", 64 | "\n", 65 | "```\n", 66 | "* Bullet 1\n", 67 | "* Bullet 2\n", 68 | "* Bullet 3\n", 69 | "\n", 70 | "1. Numbered list 1\n", 71 | "2. Numbered list 2\n", 72 | "3. Numbered list 3\n", 73 | "```\n", 74 | "Which looks like:\n", 75 | "\n", 76 | "* Bullet 1\n", 77 | "* Bullet 2\n", 78 | "* Bullet 3\n", 79 | "\n", 80 | "And:\n", 81 | "\n", 82 | "1. Numbered list 1\n", 83 | "2. Numbered list 2\n", 84 | "3. Numbered list 3\n", 85 | "\n", 86 | "#### Links\n", 87 | "\n", 88 | "You can add a link like this: `[text to be linked](http://website.com)` \n", 89 | "It looks like this: [text to be linked](http://www.google.com)\n", 90 | "\n", 91 | "#### Tables\n", 92 | "\n", 93 | "Tables are good at showing tabular data. Sounds basic, but people seem to forget tables when there are so many good data visualization options out there. Tables look like this:\n", 94 | "\n", 95 | "```\n", 96 | "| FieldName1 | FieldName2 |\n", 97 | "| ---------- | ---------- |\n", 98 | "| foo | bar |\n", 99 | "| baz | bing |\n", 100 | "| boo | buzz |\n", 101 | "```\n", 102 | "\n", 103 | "And that looks like:\n", 104 | "\n", 105 | "| FieldName1 | FieldName2 |\n", 106 | "| ---------- | ---------- |\n", 107 | "| foo | bar |\n", 108 | "| baz | bing |\n", 109 | "| boo | buzz |\n", 110 | "\n", 111 | "#### Images\n", 112 | "\n", 113 | "The way to handle images in your post is to put the images in the same folder as your Jupyter Notebook and path to them. \n", 114 | "\n", 115 | "To embed an image, it looks like this: `![Dog](dog.jpg)` \n", 116 | "\n", 117 | "![Dog](dog.jpg)\n", 118 | "\n", 119 | "## GitHub\n", 120 | "\n", 121 | "GitHub is a social code sharing website used by millions of developers around the world. It's a place for people to put their code so others can see it, be inspired by it, even participate in it. Other developers can make a copy of your software, improve it and give that back to you. \n", 122 | "\n", 123 | "It's also an ideal place to store your notebooks to foster transparency. With some simple tools, you can publish your notebooks next to your stories so readers who want to know more can see how you did what you did. \n", 124 | "\n", 125 | "You get transparency and replicability in one swoop.\n", 126 | "\n", 127 | "First things first: [Create an account](https://github.com/).\n", 128 | "\n", 129 | "On GitHub you create **repositories** of code. You will have a local copy on your computer, and you'll have a copy on GitHub. You will keep them in sync using **commits** where you will **push** code to GitHub or **pull** it down from Github, depending on which way you need to move code. \n", 130 | "\n", 131 | "Let's make your first repository, just to test it out. First click on the green **New Repository** button. Now we need to give our repostitory a name, a description, and initialize it with a README file. \n", 132 | "\n", 133 | "![Screen1](screen1.png)\n", 134 | "\n", 135 | "### GitHub desktop\n", 136 | "\n", 137 | "For most people, the easiest way to work with GitHub is through their desktop application. [You can download it here](https://desktop.github.com/).\n", 138 | "\n", 139 | "Log into your account via the desktop app. What we first need to do is **clone** our repository to our local machine. \n", 140 | "\n", 141 | "Once logged in, click the plus button in the top right corner and then click Clone. \n", 142 | "\n", 143 | "![Screen2](screen2.png)\n", 144 | "\n", 145 | "Click on your repository from the list and then click Clone your project. Tell GitHub where to clone it -- this is up to you, but make it somewhere you can find it again and do not move it. \n", 146 | "\n", 147 | "Now that we have a clone of it, let's edit the README file. Let's add this sentence: \"I am learning about GitHub.\"\n", 148 | "\n", 149 | "Save the file and go back to GitHub Desktop. You should see you have 1 uncommitted change.\n", 150 | "\n", 151 | "![Screen3](screen3.png)\n", 152 | "\n", 153 | "Click that. You are now going to create a **commit message**, which is like a note to yourself as to what this change is. In this case, we edited README, so add that as the summary and click **Commit to Master**, which is what you are doing. You have a master branch of your code. If, later, you wanted to try something new but didn't want to mess with your existing code, you could create a branch off of master, work there, and if it worked you could roll it back into master. But that's a topic for another day. \n", 154 | "\n", 155 | "Once you have committed to master, you haven't actually sent it to GitHub until you hit the Sync button in the top right. This is the **push** and **pull** parts of GitHub. The desktop app does them all at once. On the command line, these are separate commands. \n", 156 | "\n", 157 | "### Adding your files\n", 158 | "\n", 159 | "With a repository set up like this, you can add your Jupyter Notebooks and other files into the folder and commit them. GitHub will render a notebook as HTML in the browser, which is what makes this an ideal way to do this. \n", 160 | "\n", 161 | "### Assignment\n", 162 | "\n", 163 | "This is how you are going to publish your first story. You are going to combine your code, graphics and text into a single notebook to tell your story. Your notebook should ONLY be the code needed to tell the story -- your scratch work or errors should be in a separate file. You will use Markdown to give it a headline, byline and add your story text between your graphics. You will embed your finished graphics -- if you do them in ggplot or fix them up in Illustrator is up to you -- in the notebook. When it's done, it should be ready to publish in a particularly nerdy publication that likes R code mixed in with stories. \n", 164 | "\n", 165 | "You do not need to turn in anything for this assignment, but for your first major assignment, you will turn in the GitHub URL for your project. It will look something like this: [https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb)\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [] 176 | } 177 | ], 178 | "metadata": { 179 | "anaconda-cloud": {}, 180 | "kernelspec": { 181 | "display_name": "R", 182 | "language": "R", 183 | "name": "ir" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": "r", 187 | "file_extension": ".r", 188 | "mimetype": "text/x-r-source", 189 | "name": "R", 190 | "pygments_lexer": "r", 191 | "version": "3.4.1" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 1 196 | } 197 | -------------------------------------------------------------------------------- /Assignments/14_Transparency/.ipynb_checkpoints/Transparency-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Transparency and publication\n", 8 | "\n", 9 | ">__\"You should have a strong command of at least one toolset that (a) allows for filtering, joining, pivoting, and aggregating tabular data, and (b) enables reproducible workflows.\"__ -- Buzzfeed job posting, 2017\n", 10 | "\n", 11 | ">__\"As a general rule, all assertions in a story based on data analysis should be reproducible. The methodology description in the story or accompanying materials should provide a road map to replicate the analysis.\"__ -- The Associated Press Stylebook\n", 12 | "\n", 13 | "Trust in media is low and declining. The reasons for this are myriad, and we can control only what we can control. One thing we can do -- be more transparent about what we do. And this notion goes beyond just journalism -- why should anyone trust what you have to say if you can't show your work? \n", 14 | "\n", 15 | "Jupyter Notebooks are good at this, being able to mix code and text. But your notebooks are currently only visible to you. So we're going to work on improving your notebooks with Markdown and Github.\n", 16 | "\n", 17 | "### Markdown\n", 18 | "\n", 19 | "Markdown is what you are writing in when you aren't writing code in Jupyter Notebooks. It's very simple, and there's only a finite number of things you can do with it, but you can drasically improve your notebooks with some simple typographic tricks. Here's a partial listing of what you can do in Markdown that might be useful in notebooks.\n", 20 | "\n", 21 | "#### Headers\n", 22 | "\n", 23 | "```\n", 24 | "# h1\n", 25 | "## h2\n", 26 | "### h3\n", 27 | "#### h4\n", 28 | "##### h5\n", 29 | "```\n", 30 | "\n", 31 | "Which looks like:\n", 32 | "\n", 33 | "# h1\n", 34 | "## h2\n", 35 | "### h3\n", 36 | "#### h4\n", 37 | "##### h5\n", 38 | "\n", 39 | "#### Blockquotes\n", 40 | "\n", 41 | "To get a block quote, add `>` at the beginning of the line. \n", 42 | "\n", 43 | "It looks like:\n", 44 | "\n", 45 | "> This is a block quote\n", 46 | "\n", 47 | "#### Horizontal rule\n", 48 | "\n", 49 | "You can use a horizontal rule to separate content -- a thematic break. \n", 50 | "\n", 51 | "In Jupyter notebooks, you create a horizontal rule with three dashes: `---`\n", 52 | "\n", 53 | "Which looks like:\n", 54 | "\n", 55 | "---\n", 56 | "\n", 57 | "#### Text treatments\n", 58 | "\n", 59 | "You can **bold** text, _italicize_ text, even ~~strikethrough~~ text with `**bold**`, `_italicize_` and `~~strikethrough~~`. \n", 60 | "\n", 61 | "#### Lists\n", 62 | "\n", 63 | "You can create bulleted or numbered lists like this:\n", 64 | "\n", 65 | "```\n", 66 | "* Bullet 1\n", 67 | "* Bullet 2\n", 68 | "* Bullet 3\n", 69 | "\n", 70 | "1. Numbered list 1\n", 71 | "2. Numbered list 2\n", 72 | "3. Numbered list 3\n", 73 | "```\n", 74 | "Which looks like:\n", 75 | "\n", 76 | "* Bullet 1\n", 77 | "* Bullet 2\n", 78 | "* Bullet 3\n", 79 | "\n", 80 | "And:\n", 81 | "\n", 82 | "1. Numbered list 1\n", 83 | "2. Numbered list 2\n", 84 | "3. Numbered list 3\n", 85 | "\n", 86 | "#### Links\n", 87 | "\n", 88 | "You can add a link like this: `[text to be linked](http://website.com)` \n", 89 | "It looks like this: [text to be linked](http://www.google.com)\n", 90 | "\n", 91 | "#### Tables\n", 92 | "\n", 93 | "Tables are good at showing tabular data. Sounds basic, but people seem to forget tables when there are so many good data visualization options out there. Tables look like this:\n", 94 | "\n", 95 | "```\n", 96 | "| FieldName1 | FieldName2 |\n", 97 | "| ---------- | ---------- |\n", 98 | "| foo | bar |\n", 99 | "| baz | bing |\n", 100 | "| boo | buzz |\n", 101 | "```\n", 102 | "\n", 103 | "And that looks like:\n", 104 | "\n", 105 | "| FieldName1 | FieldName2 |\n", 106 | "| ---------- | ---------- |\n", 107 | "| foo | bar |\n", 108 | "| baz | bing |\n", 109 | "| boo | buzz |\n", 110 | "\n", 111 | "#### Images\n", 112 | "\n", 113 | "The way to handle images in your post is to put the images in the same folder as your Jupyter Notebook and path to them. \n", 114 | "\n", 115 | "To embed an image, it looks like this: `![Dog](dog.jpg)` \n", 116 | "\n", 117 | "![Dog](dog.jpg)\n", 118 | "\n", 119 | "## GitHub\n", 120 | "\n", 121 | "GitHub is a social code sharing website used by millions of developers around the world. It's a place for people to put their code so others can see it, be inspired by it, even participate in it. Other developers can make a copy of your software, improve it and give that back to you. \n", 122 | "\n", 123 | "It's also an ideal place to store your notebooks to foster transparency. With some simple tools, you can publish your notebooks next to your stories so readers who want to know more can see how you did what you did. \n", 124 | "\n", 125 | "You get transparency and replicability in one swoop.\n", 126 | "\n", 127 | "First things first: [Create an account](https://github.com/).\n", 128 | "\n", 129 | "On GitHub you create **repositories** of code. You will have a local copy on your computer, and you'll have a copy on GitHub. You will keep them in sync using **commits** where you will **push** code to GitHub or **pull** it down from Github, depending on which way you need to move code. \n", 130 | "\n", 131 | "Let's make your first repository, just to test it out. First click on the green **New Repository** button. Now we need to give our repostitory a name, a description, and initialize it with a README file. \n", 132 | "\n", 133 | "![Screen1](screen1.png)\n", 134 | "\n", 135 | "### GitHub desktop\n", 136 | "\n", 137 | "For most people, the easiest way to work with GitHub is through their desktop application. [You can download it here](https://desktop.github.com/).\n", 138 | "\n", 139 | "Log into your account via the desktop app. What we first need to do is **clone** our repository to our local machine. \n", 140 | "\n", 141 | "Once logged in, click the plus button in the top right corner and then click Clone. \n", 142 | "\n", 143 | "![Screen2](screen2.png)\n", 144 | "\n", 145 | "Click on your repository from the list and then click Clone your project. Tell GitHub where to clone it -- this is up to you, but make it somewhere you can find it again and do not move it. \n", 146 | "\n", 147 | "Now that we have a clone of it, let's edit the README file. Let's add this sentence: \"I am learning about GitHub.\"\n", 148 | "\n", 149 | "Save the file and go back to GitHub Desktop. You should see you have 1 uncommitted change.\n", 150 | "\n", 151 | "![Screen3](screen3.png)\n", 152 | "\n", 153 | "Click that. You are now going to create a **commit message**, which is like a note to yourself as to what this change is. In this case, we edited README, so add that as the summary and click **Commit to Master**, which is what you are doing. You have a master branch of your code. If, later, you wanted to try something new but didn't want to mess with your existing code, you could create a branch off of master, work there, and if it worked you could roll it back into master. But that's a topic for another day. \n", 154 | "\n", 155 | "Once you have committed to master, you haven't actually sent it to GitHub until you hit the Sync button in the top right. This is the **push** and **pull** parts of GitHub. The desktop app does them all at once. On the command line, these are separate commands. \n", 156 | "\n", 157 | "### Adding your files\n", 158 | "\n", 159 | "With a repository set up like this, you can add your Jupyter Notebooks and other files into the folder and commit them. GitHub will render a notebook as HTML in the browser, which is what makes this an ideal way to do this. \n", 160 | "\n", 161 | "### Assignment\n", 162 | "\n", 163 | "This is how you are going to publish your first story. You are going to combine your code, graphics and text into a single notebook to tell your story. Your notebook should ONLY be the code needed to tell the story -- your scratch work or errors should be in a separate file. You will use Markdown to give it a headline, byline and add your story text between your graphics. You will embed your finished graphics -- if you do them in ggplot or fix them up in Illustrator is up to you -- in the notebook. When it's done, it should be ready to publish in a particularly nerdy publication that likes R code mixed in with stories. \n", 164 | "\n", 165 | "You do not need to turn in anything for this assignment, but for your first major assignment, you will turn in the GitHub URL for your project. It will look something like this: [https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/11_FinishingTouches/FinishingTouches.ipynb)\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [] 176 | } 177 | ], 178 | "metadata": { 179 | "anaconda-cloud": {}, 180 | "kernelspec": { 181 | "display_name": "R", 182 | "language": "R", 183 | "name": "ir" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": "r", 187 | "file_extension": ".r", 188 | "mimetype": "text/x-r-source", 189 | "name": "R", 190 | "pygments_lexer": "r", 191 | "version": "3.4.1" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 1 196 | } 197 | -------------------------------------------------------------------------------- /Data/mountainlions.csv: -------------------------------------------------------------------------------- 1 | ID,Cofirm Type,COUNTY,Date 1,Track,Dawes,9/14/91 2,Mortality,Sioux,11/10/91 3,Mortality,Scotts Bluff,4/21/96 4,Mortality,Sioux,5/9/99 5,Mortality,Box Butte,9/29/99 6,Track,Scotts Bluff,11/12/99 7,Track,Howard,2/26/00 8,Track,Scotts Bluff,9/15/00 9,Mortality,Howard,11/20/00 10,Photo,Brown,12/1/01 11,Trail Camera Photo,Brown,6/1/02 12,Captured,Douglas,10/1/03 13,Track,Cherry,1/6/04 14,Mortality,Thomas,4/18/04 15,Track,Dawes,5/12/04 16,Trail Camera Photo,Keya Paha,9/10/04 17,Mortality,Dakota,11/23/04 18,Track,Dawes,1/31/05 19,Track,Brown,2/2/05 20,Photo,Dawes,6/7/05 21,Track,Dawes,7/25/05 22,Track,Dawes,8/28/05 23,Mortality,Sarpy,11/6/05 24,Trail Camera Photo,Sioux,11/22/05 25,Track,Dawes,1/5/06 26,Trail Camera Photo,Sioux,1/11/06 27,Trail Camera Photo,Dawes,2/17/06 28,Trail Camera Photo,Dawes,3/20/06 29,Trail Camera Photo,Scotts Bluff,4/4/06 30,Track,Custer,4/22/06 31,Mortality,Cherry,6/27/06 32,Mortality,Scotts Bluff,10/2/06 33,DNA,Dawes,10/23/06 34,Trail Camera Photo,Dawes,12/6/06 35,Track,Sheridan,12/20/06 36,Mortality,Dawes,2/28/07 37,Trail Camera Photo,Dawes,2/27/07 38,Trail Camera Photo,Dawes,2/27/07 39,Trail Camera Photo,Dawes,2/27/07 40,Trail Camera Photo,Sioux,3/27/07 41,Trail Camera Photo,Sioux,4/18/07 42,Mortality,Dawes,7/10/07 43,Trail Camera Photo,Sioux,7/18/07 44,Track,Dawes,10/10/07 45,Trail Camera Photo,Sioux,9/18/07 46,Trail Camera Photo,Banner,10/31/07 47,Mortality,Scotts Bluff,2/7/08 48,Mortality,Dawes,1/1/02 49,Mortality,Dawes,12/1/06 50,Track,Dawes,4/15/08 51,Trail Camera Photo,Knox,5/11/08 52,Track,Dawes,5/28/08 53,Track,Dawes,7/16/08 54,DNA,Dawes,5/22/08 55,Trail Camera Photo,Dawes,10/22/08 56,Trail Camera Photo,Sioux,9/7/08 57,Trail Camera Photo,Sioux,10/29/08 58,Mortality,Dawes,11/22/08 59,DNA,Sheridan,11/4/08 60,Prey,Dawes,10/12/08 61,Track,Sheridan,12/4/08 62,Track,Dawes,12/6/08 63,Track,Sheridan,12/10/08 64,Track,Dawes,12/11/08 65,Track,Dawes,11/16/08 66,Trail Camera Photo,Dawes,12/20/08 67,Trail Camera Photo,Dawes,12/20/08 68,Photo,Nance,1/12/09 69,Mortality,Scotts Bluff,1/25/09 70,Track,Platte,1/29/09 71,Trail Camera Photo,Sheridan,12/10/08 72,Trail Camera Photo,Sioux,3/2/09 73,Track,Dawes,4/27/09 74,Trail Camera Photo,Dawes,5/9/09 75,Trail Camera Photo,Dawes,5/9/09 76,Trail Camera Photo,Sioux,5/5/09 77,Prey,Dawes,6/23/09 78,DNA,Dawes,4/19/09 79,Track,Dawes,7/30/09 80,Trail Camera Photo,Sheridan,8/5/09 81,Photo,Dawes,8/6/09 82,Photo,Dawes,9/4/09 83,Trail Camera Photo,Sheridan,9/2/09 84,Trail Camera Photo,Sheridan,9/2/09 85,Track,Cherry,9/29/09 86,Prey,Dawes,9/30/09 87,Trail Camera Photo,Sheridan,8/15/09 88,Trail Camera Photo,Sioux,11/1/09 89,Trail Camera Photo,Dawes,11/6/09 90,Prey,Custer,12/17/09 91,Track,Dawson,12/18/09 92,Trail Camera Photo,Dawes,11/19/09 93,Prey,Dawes,12/8/09 94,Trail Camera Photo,Sheridan,12/21/09 95,Trail Camera Photo,Sheridan,12/21/09 96,Mortality,Sheridan,2/21/10 97,Track,Thomas,2/6/10 98,Track,Dawes,2/9/10 99,Track,Dawes,2/27/10 100,DNA,Dawes,10/7/09 101,DNA,Sheridan,11/28/09 102,DNA,Dawes,11/26/09 103,Trail Camera Photo,Rock,5/8/10 104,Trail Camera Photo,Sioux,4/2/10 105,Photo,Hooker,4/1/09 106,Trail Camera Photo,Sheridan,5/19/10 107,Trail Camera Photo,Dawes,7/31/10 108,Trail Camera Photo,Dawes,8/13/10 109,Trail Camera Photo,Lincoln,8/8/10 110,DNA,Cherry,4/7/10 111,Trail Camera Photo,Dawes,8/15/10 112,Trail Camera Photo,Custer,8/25/10 113,Trail Camera Photo,Dawson,9/9/10 114,Photo,Dawes,9/11/10 115,Mortality,Box Butte,9/14/10 116,Mortality,Dawes,9/17/10 117,Track,Banner,10/8/10 118,Mortality,Scotts Bluff,10/19/10 119,Trail Camera Photo,Sheridan,10/15/10 120,Trail Camera Photo,Sheridan,9/11/10 121,Trail Camera Photo,Sheridan,9/20/10 122,Mortality,Dawes,10/25/10 123,Trail Camera Photo,Dawson,9/24/10 124,Trail Camera Photo,Custer,10/15/10 125,Trail Camera Photo,Polk,11/8/10 126,Trail Camera Photo,Howard,10/24/10 127,Trail Camera Photo,Valley,11/1/10 128,Mortality,Sioux,11/17/10 129,Trail Camera Photo,Sheridan,9/11/10 130,Trail Camera Photo,Sherman,10/1/10 131,Photo,Sioux,12/21/10 132,Trail Camera Photo,Dawson,12/23/10 133,Trail Camera Photo,Cherry,11/2/10 134,DNA,Dawes,7/28/10 135,Photo,Dawes,2/10/11 136,Photo,Dawes,2/11/11 137,Trail Camera Photo,Dawes,3/4/11 138,Photo,Blaine,4/25/11 139,Mortality,Saunders,4/30/11 140,Mortality,Buffalo,5/9/11 141,Trail Camera Photo,Sheridan,5/10/11 142,Trail Camera Photo,sheridan,5/10/11 143,Track,Lincoln,7/20/11 144,Track,Dawes,6/20/11 145,DNA,sheridan,4/7/11 146,Trail Camera Photo,Dawes,8/8/11 147,Track,Keya Paha,8/24/11 148,Trail Camera Photo,Dawes,8/31/11 149,Trail Camera Photo,Dawes,9/30/11 150,DNA,Sioux,3/17/11 151,Mortality,Knox,11/12/11 152,Trail Camera Photo,Sioux,9/13/11 153,Trail Camera Photo,Sioux,9/27/11 154,Trail Camera Photo,Sioux,11/19/11 155,Trail Camera Photo,Keya Paha,10/30/11 156,Trail Camera Photo,Sioux,10/31/11 157,Trail Camera Photo,Sioux,10/31/11 158,Trail Camera Photo,Brown,11/29/11 159,Trail Camera Photo,Sioux,9/28/11 160,Trail Camera Photo,Sioux,9/28/11 161,Trail Camera Photo,Keya Paha,11/27/11 162,Mortality,Dawes,12/26/11 163,Photo,Dawes,12/26/11 164,Photo,Dawes,12/26/11 165,Trail Camera Photo,Sioux,12/11/11 166,Trail Camera Photo,Sioux,12/11/11 167,Trail Camera Photo,Sioux,12/11/11 168,Trail Camera Photo,Sioux,11/30/11 169,Trail Camera Photo,Dawes,1/5/12 170,Trail Camera Photo,Rock,1/12/12 171,Trail Camera Photo,Brown,12/18/11 172,Trail Camera Photo,Keya Paha,10/23/11 173,Photo,Dawes,1/27/12 174,Trail Camera Photo,Rock,10/16/11 175,Photo,Dawes,2/9/12 176,DNA,Dawes,8/12/11 177,Mortality,Dawes,2/12/12 178,Trail Camera Photo,Sioux,12/11/11 179,Track,Thomas,2/15/12 180,Track,Sheridan,1/18/12 181,Track,Sheridan,1/18/12 182,Track,Thomas,7/15/11 183,Trail Camera Photo,Thurston,11/23/11 184,Trail Camera Photo,Scotts Bluff,3/3/12 185,Track,Thomas,2/12/12 186,DNA,Dawes,2/13/12 187,Track,Sioux,2/24/12 188,Trail Camera Photo,Dawes,3/23/12 189,Trail Camera Photo,Sioux,3/27/12 190,Trail Camera Photo,Sioux,3/27/12 191,Trail Camera Photo,Sioux,3/27/12 192,Trail Camera Photo,Dixon,2/4/12 193,Trail Camera Photo,Sioux,3/15/12 194,Trail Camera Photo,Custer,1/22/12 195,Trail Camera Photo,Custer,2/9/12 196,Trail Camera Photo,Custer,4/15/12 197,Trail Camera Photo,Holt,4/30/12 198,Trail Camera Photo,Dawes,5/11/12 199,Photo,Banner,7/8/12 200,Trail Camera Photo,Sioux,7/19/12 201,Trail Camera Photo,Dawes,8/16/12 202,Mortality,Kimball,8/18/12 203,Trail Camera Photo,Morrill,8/19/12 204,Trail Camera Photo,Sioux,7/31/12 205,Mortality,Scotts Bluff,8/27/12 206,DNA,Box Butte,8/23/12 207,Mortality,Sheridan,9/16/12 208,Photo,Sheridan,8/30/12 209,Photo,Sheridan,8/30/12 210,Photo,Sheridan,8/30/12 211,Trail Camera Photo,Lincoln,8/21/12 212,Trail Camera Photo,Scotts Bluff,9/24/12 213,Mortality,Sheridan,10/15/12 214,Trail Camera Photo,Rock,10/16/12 215,Mortality,Rock,11/10/12 216,Prey,Sheridan,9/3/12 217,Trail Camera Photo,Sioux,10/23/12 218,Trail Camera Photo,Cedar,10/28/12 219,Trail Camera Photo,Knox,11/14/12 220,Trail Camera Photo,Rock,11/25/12 221,Trail Camera Photo,Dawes,11/21/12 222,Trail Camera Photo,Sioux,12/1/12 223,Trail Camera Photo,Lincoln,12/12/12 224,Trail Camera Photo,Sheridan,11/13/12 225,Track,Scotts Bluff,1/5/13 226,Mortality,Scotts Bluff,1/9/13 227,DNA,Sioux,11/8/12 228,Track,Scotts Bluff,1/12/13 229,Track,Scotts Bluff,1/29/13 230,Mortality,Sheridan,2/8/13 231,Trail Camera Photo,Lincoln,11/24/12 232,Trail Camera Photo,Sheridan,7/26/12 233,Track,Cherry,1/23/13 234,Trail Camera Photo,Sioux,1/12/13 235,DNA,Dawes,1/7/13 236,Track,Cherry,2/7/13 237,Track,Cherry,2/25/13 238,Track,Dawes,2/25/13 239,Mortality,Dawes,3/28/13 240,Trail Camera Photo,Cherry,3/19/13 241,Photo,Dawes,5/4/13 242,DNA,Cherry,1/10/13 243,DNA,Scotts Bluff,2/20/13 244,Trail Camera Photo,Dawes,6/11/13 245,Trail Camera Photo,Sheridan,5/30/13 246,Trail Camera Photo,Morrill,7/18/13 247,Mortality,Sheridan,7/30/13 248,Track,Keith,8/4/13 249,DNA,Dawes,8/4/13 251,Prey,Dawes,8/19/13 250,Trail Camera Photo,Knox,10/10/12 252,Trail Camera Photo,Scotts Bluff,5/29/13 253,Trail Camera Photo,Scotts Bluff,7/13/13 254,Trail Camera Photo,Keya Paha,7/19/13 255,Trail Camera Photo,Keya Paha,8/26/13 256,Trail Camera Photo,Saunders,9/19/13 257,Trail Camera Photo,Dawes,10/24/13 258,Trail Camera Photo,Merrick,9/7/13 259,Photo,Dawes,8/15/13 260,Trail Camera Photo,Cherry,9/25/13 261,Trail Camera Photo,Cherry,11/8/13 262,Trail Camera Photo,Lincoln,10/24/13 263,Mortality,Sioux,12/20/13 264,Trail Camera Photo,Dawes,11/28/13 265,Mortality,Dawes,1/2/14 266,Mortality,Sioux,1/2/14 267,Trail Camera Photo,Keya Paha,10/24/13 268,Trail Camera Photo,Dawes,1/9/14 269,Track,Lincoln,2/3/14 270,Mortality,Sioux,2/1/14 271,Mortality,Custer,2/16/14 272,Track,Cherry,2/20/14 273,Trail Camera Photo,Brown,12/17/13 274,Trail Camera Photo,Brown,1/2/14 275,Mortality,Sheridan,2/26/14 276,Trail Camera Photo,Brown,2/24/14 277,Trail Camera Photo,Brown,3/12/14 278,Mortality,Sheridan,3/21/14 279,Trail Camera Photo,Dawes,12/1/13 280,Trail Camera Photo,Dawes,12/1/13 281,Trail Camera Photo,Dawes,4/5/14 282,Trail Camera Photo,Knox,3/22/14 283,Trail Camera Photo,Scotts Bluff,5/15/14 284,Trail Camera Photo,Scotts Bluff,5/15/14 285,Trail Camera Photo,Scotts Bluff,5/15/14 286,Mortality,Dawes,5/2/14 287,Trail Camera Photo,Dawes,4/28/14 288,DNA,Blaine,4/2/14 289,Trail Camera Photo,Keya Paha,1/18/14 290,Trail Camera Photo,Keya Paha,4/16/14 291,DNA,Cherry,3/3/14 292,DNA,Dawes,3/29/13 293,Mortality,Sioux,6/22/14 294,Trail Camera Photo,Keya Paha,5/21/14 295,Trail Camera Photo,Keya Paha,5/4/14 296,Trail Camera Photo,Keya Paha,6/18/13 297,Trail Camera Photo,Dawes,4/23/13 298,Mortality,Dawes,7/19/14 299,Mortality,Dawes,8/2/14 300,Photo,Box Butte,8/6/14 301,Trail Camera Photo,Knox,8/5/14 302,Trail Camera Photo,Knox,8/11/14 303,Mortality,Dawes,9/6/14 304,Trail Camera Photo,Hall,9/21/14 305,Trail Camera Photo,Holt,9/17/14 306,Mortality,Knox,10/5/14 307,Track,Dixon,8/31/14 308,Mortality,Wheeler,10/9/14 309,Trail Camera Photo,Dawes,10/14/14 310,Trail Camera Photo,Dawes,10/14/14 311,Trail Camera Photo,Dawes,10/14/14 312,Trail Camera Photo,Scotts Bluff,8/20/14 313,Photo,Cherry,10/24/14 314,Mortality,Dawes,10/23/14 315,Trail Camera Photo,Dakota,10/25/14 316,Trail Camera Photo,Keya Paha,7/8/14 317,Mortality,Dakota,11/17/14 318,Trail Camera Photo,Brown,11/4/14 319,Trail Camera Photo,Cherry,9/7/14 320,Trail Camera Photo,Banner,11/9/14 321,Track,Cherry,11/18/14 322,Trail Camera Photo,Keya Paha,8/27/14 323,Trail Camera Photo,Keya Paha,12/3/14 324,Trail Camera Photo,Dixon,11/10/14 325,Trail Camera Photo,Cherry,10/3/14 326,Trail Camera Photo,Cherry,10/30/14 327,Trail Camera Photo,Cherry,11/23/14 328,Trail Camera Photo,Cherry,12/2/14 329,Trail Camera Photo,Cherry,11/3/14 330,Trail Camera Photo,Cherry,10/26/14 331,Photo,Dawes,12/8/14 332,Mortality,Dawes,12/10/14 333,Track,Rock,12/21/14 334,Trail Camera Photo,Banner,12/14/14 335,Trail Camera Photo,Dawes,12/17/14 336,Track,Dawson,12/27/14 337,Track,Brown,12/28/14 338,Trail Camera Photo,Lincoln,12/26/14 339,Trail Camera Photo,Brown,12/24/14 340,Trail Camera Photo,Rock,12/22/14 341,Trail Camera Photo,Rock,11/30/14 342,Trail Camera Photo,Rock,4/4/14 343,Track,Rock,11/19/14 344,Track,Brown,12/25/14 345,Track,Buffalo,1/24/15 346,Trail Camera Photo,Scotts Bluff,6/13/14 347,Trail Camera Photo,Scotts Bluff,6/29/14 348,Trail Camera Photo,Richardson,1/22/15 349,Track,Buffalo,1/17/15 350,Trail Camera Photo,Dawes,1/19/15 351,Trail Camera Photo,Lincoln,12/6/14 352,Photo,Brown,3/10/15 353,Trail Camera Photo,Banner,1/22/15 354,Mortality,Sheridan,3/8/15 355,Mortality,Douglas,5/6/15 356,Mortality,Sioux,4/25/15 357,Research,Scotts Bluff,2/2/15 358,Research,Scotts Bluff,2/3/15 359,Research,Dawes,2/10/15 360,Research,Dawes,3/8/15 361,Research,Dawes,3/19/15 362,Research,Dawes,5/16/15 363,Trail Camera Photo,Cherry,1/28/15 364,Trail Camera Photo,Cherry,2/19/15 365,Trail Camera Photo,Cherry,3/31/15 366,Trail Camera Photo,Cherry,4/13/15 367,Trail Camera Photo,Cherry,5/8/15 368,Trail Camera Photo,Cherry,5/26/15 369,Track,Blaine,2/8/15 370,Trail Camera Photo,Keya Paha,4/4/15 371,Track,Cherry,2/20/15 372,Trail Camera Photo,Dawes,2/15/15 373,Photo,Lincoln,6/2/15 374,Trail Camera Photo,Dawes,6/29/15 375,Trail Camera Photo,Sioux,4/25/15 376,Trail Camera Photo,Sioux,11/15/14 377,Trail Camera Photo,Sioux,11/7/14 378,Trail Camera Photo,Sioux,2/5/15 379,Trail Camera Photo,Sioux,3/14/15 380,Trail Camera Photo,Sioux,3/15/15 381,Trail Camera Photo,Sioux,3/18/15 382,DNA,Keya Paha,2/15/15 383,Trail Camera Photo,Nemaha,7/18/15 384,Trail Camera Photo,Frontier,7/10/15 385,Trail Camera Photo,Nemaha,7/2/15 386,Trail Camera Photo,Nemaha,7/28/15 387,Trail Camera Photo,Nemaha,9/3/15 389,Trail Camera Photo,Richardson,8/6/15 388,Trail Camera Photo,Nemaha,9/15/15 390,Trail Camera Photo,Keya Paha,8/11/15 391,Trail Camera Photo,Brown,8/28/15 392,Trail Camera Photo,Keya Paha,4/24/15 393,Research,Dawes,10/11/15 -------------------------------------------------------------------------------- /syllabus.md: -------------------------------------------------------------------------------- 1 | # JOUR 407/807 Data Visualization 2 | __Spring 2018__ 3 | __T-Th 3:30 - 4:45 p.m.__ 4 | __ANDN 27__ 5 | 6 | Instructor: Matt Waite 7 | Email: mwaite3@unl.edu 8 | Twitter: @mattwaite 9 | Phones: (402) 802-5202 cell, (402) 472-5840 office 10 | Office: 244 Andersen Hall 11 | 12 | __Course description:__ 13 | 14 | Welcome to data visualization, where you'll learn to use storytelling, analysis and visualization techniques to inform people with data. In this class, you'll learn what makes for good visualizations and how you can develop a deeper understanding of a topic through a combination of words and graphics, forming a new kind of narrative on the web. 15 | 16 | __Course goals:__ 17 | 18 | * Understand the basics of data and data visualization 19 | * Understand the history of data visualization techniques 20 | * Master basic data analysis and visualization tools 21 | * Get hands on experience with more advanced tools 22 | * Publish visual stories using learned techniques to the web via single page web applications 23 | 24 | __Required texts:__ 25 | 26 | * The Visual Display of Quantitative Information by Edward Tufte 27 | * The Functional Art: An introduction to information graphics and visualization by Alberto Cairo 28 | 29 | __Other requirements:__ 30 | 31 | * Administrative privileges on a computer so you can install software 32 | * A GitHub account (free) 33 | * A Google account (free) 34 | * A sense of humor (also free) 35 | 36 | __News Nerd Slack:__ 37 | 38 | I use [Slack](https://slack.com/) as a way for students to get help from each other and me when we're not in class or not even in the building. I have one News Nerd Slack that includes my current code class, students of past classes and alumni who have gone on to do data journalism professionally. You'll be added to the slack during the semester, and at the end, I'll remove you unless you tell me otherwise. If you like it, stay. If not, no worries and no judgements. 39 | 40 | __Class opportunities:__ 41 | 42 | The opportunities in this class are to experiment with storytelling in a way you have never done before. Creativity and ambition will be rewarded. Copy and pasting will not. As such, this class will require to you try a lot of things on your own. If you've never done this before, it can be a little daunting starting out. To help you, I've set up a time called [Maker Hours](http://www.makerhours.org) where you can come, hang out, learn something new and get help with it. The information is in the link, but the short version is: Most Friday afternoons, room 27, Andersen Hall. This time is open to any student wanting to learn programming, data viz, hardware hacking -- anything in the digital journalism space that can help them tell stories on the web. It's not required for you to show up at Maker Hours, but if you're struggling, stuck, frustrated or just really interested in learning more, come hang out. I promise you it's worth the time. 43 | 44 | __Grading:__ 45 | 46 | I use the standard grading scale. 47 | 48 | |Grade|Percentage| 49 | |-----|----------| 50 | |A+|97-100| 51 | |A|93-96| 52 | |A-|90-92| 53 | |B+|87-89| 54 | |B|83-86| 55 | |B-|80-82| 56 | |C+|77-79| 57 | |C|73-76| 58 | |C-|70-72| 59 | |D+|67-69| 60 | |D|63-66| 61 | |D-|60-62| 62 | |F|59 or below| 63 | 64 | However, your final letter grade will be weighted: 65 | 66 | Assignment|Weight 67 | ----------|------ 68 | Reading quizzes|10% 69 | Assignments|60% 70 | Final assignment|30% 71 | 72 | You will be graded on effort, creativity and clarity of your work. Work hard, try new things, let your effort show through in the work and you'll do fine. Mail it in and your grade will reflect it. 73 | 74 | A note: Not reflected in the grade weights is how I handle things like attendance and class participation. I do not take attendance, but I know if you are there or not. I do not record class participation, but I do know if you take part in class regularly and contribute. So when it comes time to submit grades, and you're right on the line between a B+ and A-, it's your attendance and participation that will sway me to round up or down. Show up, take part, be present and I round up, sometimes pretty generously. Sit on your phone, skip class, never talk and your grade is your grade, I don't care how close you are. 75 | 76 | __Final Project:__ 77 | The skills and the ideas we're going to discuss in this class are all building toward a final project where you will tell a story with data. The absolute minimum requirements of this final project are: 78 | 79 | * Tell a story with data using visual and narrative techniques discussed in the class. 80 | * This story must use three different types of visualization. 81 | * This story must have multiple data sources. 82 | * The story must use Tarbell for publication. 83 | 84 | You will be graded on: 85 | 86 | * The story 87 | * The techniques you use 88 | * The sophistication of the visualizations 89 | * The creativity you show 90 | * The effort you put in, evidenced by scrum participation, GitHub code check-ins, the questions you ask, etc. 91 | 92 | __Notes on attendance__ 93 | 94 | Yes, we all get sick. Yes, things happen. I don’t want you to be sick in my class any more than you want to be sick. You’ve got no fewer than five ways to get ahold of me. If you are going to miss class, tell me before class. We’ll work it out. But you have to tell me before class for me to help you. This said: this class builds each class onto the next one. Miss a class and you are behind. We’re going to be covering a lot of new material in this class. Miss one at your own peril. Assume that you missed something important. And know it is entirely on you to find out what you missed and how to catch up. 95 | 96 | __Policies__ 97 | 98 | Here's the short version. You cheat, you fail, no exceptions. If I’m doing something that’s keeping you from learning, tell me. Tell the Dean. Tell someone, because that’s not cool. I won’t tolerate it from myself and you shouldn’t either. 99 | Now the longer versions. 100 | 101 | __ACEJMC Competencies__ 102 | 103 | After this class, you should be able to: 104 | * Understand concepts and apply theories in the use and presentation of images and information; 105 | * Demonstrate an understanding of professional ethical principles and work ethically in pursuit of truth, accuracy, fairness and diversity; 106 | * Think critically, creatively and independently; 107 | * Conduct research and evaluate information by methods appropriate to the communications professions in which they work; 108 | * Critically evaluate their own work and that of others for accuracy and fairness, clarity, appropriate style and grammatical correctness; 109 | * Apply basic numerical and statistical concepts; 110 | * Apply tools and technologies appropriate for the communications professions in which they work. 111 | 112 | __Academic integrity:__ 113 | 114 | Every student must adhere to the policy on academic integrity set forth in the UNL Student Code of Conduct as outlined in the UNL Bulletin. Students who plagiarize may receive a failing grade on an assignment or for an entire course and may be reported to the Student Judicial Review Board. The work a student submits in a class must be the student's own work and must be work completed for that particular class and assignment. Students wishing to build on an old project or work on a similar project in two classes must discuss this with both professors. Academic dishonesty includes: 115 | 116 | * Handing in another's work or part of another's work as your own. * Turning in one of your old papers (including something you wrote in high school) for a current class. 117 | * Turning in the same or similar paper for two different classes, * Using notes or other study aids or otherwise obtaining another's answers for a quiz or an examination. 118 | Anything and everything you include in your work that comes from another source must be attributed with proper citation. That includes ideas and opinions. Plagiarism consists of using phrases, sentences or paragraphs from any source and republishing them without alteration or attribution. The sources include, but are not limited to, books, magazines, newspapers, television or radio reports, Web sites and other students’ papers. 119 | 120 | __Students with disabilities__ 121 | 122 | Students with disabilities are encouraged to contact the instructor for a confidential discussion of their individual needs for academic accommodation. It is the policy of the University of Nebraska-Lincoln to provide flexible and individualized accommodation to students with documented disabilities that may affect their ability to fully participate in course activities or meet course requirements. To receive accommodation services, students must be registered with the Services for Students with Disabilities (SSD) office, 132 Canfield Administration, 472-3787 voice or TTY. 123 | 124 | __Diversity__ 125 | 126 | The College of Journalism and Mass Communications values diversity, in the broadest sense of the word – gender, age, race, ethnicity, nationality, income, religion, education, geographic, physical and mental ability or disability, sexual orientation. We recognize that understanding and incorporating diversity in the curriculum enables us to prepare our students for careers as professional communicators in a global society. As communicators, we understand that journalism, advertising and other forms of strategic communication must reflect society in order to be effective and reliable. We fail as journalists if we are not accurate in our written, spoken and visual reports; including diverse voices and perspectives improves our accuracy and truthfulness. In advertising, we cannot succeed if we do not understand the value of or know how to create advertising that reflects a diverse society and, thus, appeals to broader audiences. 127 | 128 | ## Course schedule 129 | 130 | This is __very tentative__ and __will change__. 131 | 132 | ### Jan. 9, 2018: Intro, syllabus 133 | 134 | **In class:** Introductions, syllabus, requirements, what is data visualization? 135 | 136 | **Assignment:** 137 | 138 | * Go buy the books. Now now now now. There will be a quiz on Thursday on the readings. 139 | * Read Tufte Chapter 1, Graphical Excellence 140 | * Read Cairo Chapter 1: Why Visualize. 141 | * Install [Slack](https://slack.com/get) on your computer and your phone. 142 | 143 | ### Jan. 11, 2018: Graphical Excellence 144 | 145 | **In class:** Quiz. What are the specific elements of graphical excellence that make for a good data visualization? These are the foundational principles of the semester. 146 | 147 | **Assignment:** 148 | 149 | * Read [the layered grammar of graphics](http://byrneslab.net/classes/biol607/readings/wickham_layered-grammar.pdf) by Hadley Wickham 150 | * Do assignment 1 -- [installing Jupyter Notebook](https://github.com/mattwaite/JOUR491-Data-Visualization/blob/master/Assignments/1_Installations/installing_jupyter_notebook.md). 151 | * [Download this notebook](https://www.dropbox.com/s/1mn03dbf18llah1/Hello%20World%20in%20R.ipynb?dl=0), open it in Jupyter Notebook and walk through it. 152 | 153 | ### Jan. 16, 2018: R basics 154 | 155 | **In class:** Basic data analysis in R 156 | 157 | **Assignment:** Do the first R basics assignment 158 | 159 | ### Jan. 18, 2018: R basics 2 160 | 161 | **In class:** Basic data analysis in R, part 2 162 | 163 | **Assignment:** Do the second R basics assignment 164 | 165 | ### Jan. 23, 2018: Calculating new values 166 | 167 | **In class:** Mutate in R 168 | 169 | **Assignment:** Do the calculating percent change assignment 170 | 171 | ### Jan. 25, 2018: Working with dates 172 | 173 | **In class:** Why are dates such a problem? 174 | 175 | **Assignment:** Do the working with dates assignment 176 | 177 | ### Jan. 30, 2018: Recasting data 178 | 179 | **In class:** Narrow vs wide data 180 | 181 | **Assignment:** Do the reshape2 assignment 182 | 183 | ### Feb. 1, 2018: Intro to ggplot2 184 | 185 | **In class:** The Grammar of Graphics in R 186 | 187 | **Assignment:** Do the intro to ggplot2 assignment 188 | 189 | ### Feb. 6, 2018: More ggplot2 190 | 191 | **In class:** The Grammar of Graphics in R 192 | 193 | **Assignment:** Do the second ggplot2 assignment 194 | 195 | ### Feb. 8, 2018: Layers in ggplot2 196 | 197 | **In class:** Layering data in ggplot2 198 | 199 | **Assignment:** Do the ggplot2 layers assignment 200 | 201 | ### Feb. 13, 2018: Styling ggplot2 graphics in R 202 | 203 | **In class:** The Grammar of Graphics in R 204 | 205 | **Assignment:** Do the ggplot2 styling assignment 206 | 207 | ### Feb. 15, 2018: Styling ggplot2 graphics in Illustrator 208 | 209 | **In class:** Graphical finishing school 210 | 211 | **Assignment:** Do the styling in Illustrator assignment 212 | 213 | ### Feb. 20, 2018: Visual storytelling I 214 | 215 | **In class:** Guest speaker: Theo Francis, Wall Street Journal. 216 | 217 | **Assignment:** Reaction paper on Francis talk, prepare a story pitch for your first visual story. 218 | 219 | ### Feb. 22, 2018: Visual storytelling II 220 | 221 | **In class:** Story pitches, guest speaker TBA 222 | 223 | **Assignment:** Reaction paper, start working on your visual story 224 | 225 | ### Feb. 27, 2018: Information to wisdom 226 | 227 | **In class:** Discussion of Cairo. Tufte. 228 | 229 | **Assignment:** 230 | 231 | * Read Cairo Chapter 2 232 | * Read Tufte Chapter 4 233 | * Quiz in next class covering readings. 234 | 235 | ### March 1, 2018: Art and complexity 236 | 237 | **In class:** Discussion of Cairo readings 238 | 239 | **Assignment:** Read Cairo Chapter 6. 240 | 241 | ### March 6, 2018: Data visualization and cognition 242 | 243 | **In class:** Quiz on readings. Discussion of your brain on visualizations. 244 | 245 | **Assignment:** 246 | 247 | ### March 8, 2018: Tarbell and online publication 248 | 249 | **In class:** How we're going to publish visual stories 250 | 251 | **Assignment:** Do the tarbell assignment 252 | 253 | ### March 13, 2018: Github and transparency 254 | 255 | **In class:** Showing your work 256 | 257 | **Assignment:** Do the github assignment 258 | 259 | ### March 15, 2018: Story edits and production 260 | 261 | Sign up for your meeting time. Prepare a pitch for your second visual story. 262 | 263 | ### March 20, 2018: 264 | 265 | **No class: Spring Break** 266 | 267 | ### March 22, 2018: 268 | 269 | **No class: Spring Break** 270 | 271 | ### March 27, 2018: Critique 272 | 273 | **In class:** In class critique of published work. Story pitches 274 | 275 | **Assignment:** 276 | 277 | Read Tufte Chapter 2, 3, 5, 6 278 | 279 | ### March 29, 2018: Chart Junk 280 | 281 | **In class:** Discussion of Tufte readings. 282 | 283 | **Assignment:** TBA 284 | 285 | ### April 3, 2018: Lying with charts 286 | 287 | **In class:** Quiz on readings. Discussion of Tufte readings. 288 | 289 | **Assignment:** Look for three data visualizations -- in print, online, wherever -- and bring to class on Thursday. Read the [Junk Chart Trifecta](http://junkcharts.typepad.com/junk_charts/junk-charts-trifecta-checkup-the-definitive-guide.html) and be prepared to discuss charts based on this rubric. Read Shazna Nessa's [Visual Literacy In the Age of Data](https://source.opennews.org/en-US/learning/visual-literacy-age-data/) 290 | 291 | ### April 5, 2018: Working with maps 1 292 | 293 | ### April 10, 2018: Working with maps 2 294 | 295 | ### April 12, 2018: Working with maps 3 296 | 297 | ### April 17, 2018: Working with maps 4 298 | 299 | ### April 19, 2018: Other visualizations 300 | 301 | ### April 24, 2018: Work time 302 | 303 | In class work time and question answering. 304 | 305 | ### April 26, 2018: Story edits and production 306 | 307 | Sign up for your meeting time. 308 | -------------------------------------------------------------------------------- /Assignments/2_R_Basics/RBasics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic data analysis in R\n", 8 | "\n", 9 | "R is a statistical programming language that is purpose built for data analysis. \n", 10 | "\n", 11 | "Base R does a lot, but there are a mountain of external libraries that do things to make R better/easier/more fully featured. One of the best libraries, in your professor's opinion, is `dplyr`, a library for working with data. To use dplyr, you need to import it. " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "\n", 24 | "Attaching package: ‘dplyr’\n", 25 | "\n", 26 | "The following objects are masked from ‘package:stats’:\n", 27 | "\n", 28 | " filter, lag\n", 29 | "\n", 30 | "The following objects are masked from ‘package:base’:\n", 31 | "\n", 32 | " intersect, setdiff, setequal, union\n", 33 | "\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "library(dplyr)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "The first thing we need to do is get some data to work with. We do that by reading it in. In our case, we're going to read data from a csv file -- a comma-separated values file. \n", 46 | "\n", 47 | "The code looks like this: \n", 48 | "\n", 49 | "`mountainlions <- read.csv(\"../../Data/mountainlions.csv\")`\n", 50 | "\n", 51 | "Let's unpack that. \n", 52 | "\n", 53 | "The first part -- `mountainlions` -- is the name of your variable. A variable is just a name of a thing. In this case, our variable is a data frame, which is R's way of storing data. We can call this whatever we want. I always want to name data frames after what is in it. In this case, we're going to import a dataset of mountain lion sightings from the Nebraska Game and Parks Commission.\n", 54 | "\n", 55 | "The `<-` bit is the variable assignment operator. It's how we know we're assigning something to a word. \n", 56 | "\n", 57 | "The `read.csv` bits are pretty obvious. What happens in the quote marks is the path to the data. In there, I have to tell R where it find the data. The easiest thing to do, if you are confused about how to find your data, is to put your data in the same folder as as your notebook. In my case, I've got a folder called Data that's two levels up from my work folder. So the `../` means move up one level. So move up one level, move up one level, find Data, then in there is a file called mountainlions.csv. \n", 58 | "\n", 59 | "What you put in there will be different from mine. So your first task is to import the data. " 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "mountainlions <- read.csv(\"../../Data/mountainlions.csv\")" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Now we can inspect the data we imported. What does it look like? To do that, we use `head(mountainlions)` to show the headers and the first six rows of data. If we wanted to see them all, we could just simply enter `mountainlions` and run it. \n", 78 | "\n", 79 | "To get the number of records in our dataset, we run `nrow(mountainlions)`" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 10, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/html": [ 90 | "\n", 91 | "\n", 92 | "\n", 93 | "\t\n", 94 | "\t\n", 95 | "\t\n", 96 | "\t\n", 97 | "\t\n", 98 | "\t\n", 99 | "\n", 100 | "
IDCofirm.TypeCOUNTYDate
1 Track Dawes 9/14/91
2 Mortality Sioux 11/10/91
3 Mortality Scotts Bluff4/21/96
4 Mortality Sioux 5/9/99
5 Mortality Box Butte 9/29/99
6 Track Scotts Bluff11/12/99
\n" 101 | ], 102 | "text/latex": [ 103 | "\\begin{tabular}{r|llll}\n", 104 | " ID & Cofirm.Type & COUNTY & Date\\\\\n", 105 | "\\hline\n", 106 | "\t 1 & Track & Dawes & 9/14/91 \\\\\n", 107 | "\t 2 & Mortality & Sioux & 11/10/91 \\\\\n", 108 | "\t 3 & Mortality & Scotts Bluff & 4/21/96 \\\\\n", 109 | "\t 4 & Mortality & Sioux & 5/9/99 \\\\\n", 110 | "\t 5 & Mortality & Box Butte & 9/29/99 \\\\\n", 111 | "\t 6 & Track & Scotts Bluff & 11/12/99 \\\\\n", 112 | "\\end{tabular}\n" 113 | ], 114 | "text/markdown": [ 115 | "\n", 116 | "ID | Cofirm.Type | COUNTY | Date | \n", 117 | "|---|---|---|---|---|---|\n", 118 | "| 1 | Track | Dawes | 9/14/91 | \n", 119 | "| 2 | Mortality | Sioux | 11/10/91 | \n", 120 | "| 3 | Mortality | Scotts Bluff | 4/21/96 | \n", 121 | "| 4 | Mortality | Sioux | 5/9/99 | \n", 122 | "| 5 | Mortality | Box Butte | 9/29/99 | \n", 123 | "| 6 | Track | Scotts Bluff | 11/12/99 | \n", 124 | "\n", 125 | "\n" 126 | ], 127 | "text/plain": [ 128 | " ID Cofirm.Type COUNTY Date \n", 129 | "1 1 Track Dawes 9/14/91 \n", 130 | "2 2 Mortality Sioux 11/10/91\n", 131 | "3 3 Mortality Scotts Bluff 4/21/96 \n", 132 | "4 4 Mortality Sioux 5/9/99 \n", 133 | "5 5 Mortality Box Butte 9/29/99 \n", 134 | "6 6 Track Scotts Bluff 11/12/99" 135 | ] 136 | }, 137 | "metadata": {}, 138 | "output_type": "display_data" 139 | }, 140 | { 141 | "data": { 142 | "text/html": [ 143 | "393" 144 | ], 145 | "text/latex": [ 146 | "393" 147 | ], 148 | "text/markdown": [ 149 | "393" 150 | ], 151 | "text/plain": [ 152 | "[1] 393" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "output_type": "display_data" 157 | } 158 | ], 159 | "source": [ 160 | "head(mountainlions)\n", 161 | "nrow(mountainlions)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "So what if we wanted to know how many mountain lion sightings there were in each county? To do that by hand, we'd have to take each of the 393 records and sort them into a pile. We'd put them in groups and then count them. \n", 169 | "\n", 170 | "`dplyr` has a group by function in it that does just this. A massive amount of data analysis involves grouping like things together at some point. So it's a good place to start. \n", 171 | "\n", 172 | "So to do this, we'll take our dataset and we'll introduce a new operator: `%>%`. The best way to read that operator, in my opinion, is to interpret that as \"and then do this.\" Here's the code: " 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 11, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "\n", 184 | "\n", 185 | "\n", 186 | "\t\n", 187 | "\t\n", 188 | "\t\n", 189 | "\t\n", 190 | "\t\n", 191 | "\t\n", 192 | "\t\n", 193 | "\t\n", 194 | "\t\n", 195 | "\t\n", 196 | "\t\n", 197 | "\t\n", 198 | "\t\n", 199 | "\t\n", 200 | "\t\n", 201 | "\t\n", 202 | "\t\n", 203 | "\t\n", 204 | "\t\n", 205 | "\t\n", 206 | "\t\n", 207 | "\t\n", 208 | "\t\n", 209 | "\t\n", 210 | "\t\n", 211 | "\t\n", 212 | "\t\n", 213 | "\t\n", 214 | "\t\n", 215 | "\t\n", 216 | "\t\n", 217 | "\t\n", 218 | "\t\n", 219 | "\t\n", 220 | "\t\n", 221 | "\t\n", 222 | "\t\n", 223 | "\t\n", 224 | "\t\n", 225 | "\t\n", 226 | "\t\n", 227 | "\t\n", 228 | "\n", 229 | "
COUNTYcount
Banner 6
Blaine 3
Box Butte 4
Brown 15
Buffalo 3
Cedar 1
Cherry 30
Custer 8
Dakota 3
Dawes 111
Dawson 5
Dixon 3
Douglas 2
Frontier 1
Hall 1
Holt 2
Hooker 1
Howard 3
Keith 1
Keya Paha 20
Kimball 1
Knox 8
Lincoln 10
Merrick 1
Morrill 2
Nance 1
Nemaha 5
Platte 1
Polk 1
Richardson 2
Rock 11
Sarpy 1
Saunders 2
Scotts Bluff 26
sheridan 2
Sheridan 35
Sherman 1
Sioux 52
Thomas 5
Thurston 1
Valley 1
Wheeler 1
\n" 230 | ], 231 | "text/latex": [ 232 | "\\begin{tabular}{r|ll}\n", 233 | " COUNTY & count\\\\\n", 234 | "\\hline\n", 235 | "\t Banner & 6 \\\\\n", 236 | "\t Blaine & 3 \\\\\n", 237 | "\t Box Butte & 4 \\\\\n", 238 | "\t Brown & 15 \\\\\n", 239 | "\t Buffalo & 3 \\\\\n", 240 | "\t Cedar & 1 \\\\\n", 241 | "\t Cherry & 30 \\\\\n", 242 | "\t Custer & 8 \\\\\n", 243 | "\t Dakota & 3 \\\\\n", 244 | "\t Dawes & 111 \\\\\n", 245 | "\t Dawson & 5 \\\\\n", 246 | "\t Dixon & 3 \\\\\n", 247 | "\t Douglas & 2 \\\\\n", 248 | "\t Frontier & 1 \\\\\n", 249 | "\t Hall & 1 \\\\\n", 250 | "\t Holt & 2 \\\\\n", 251 | "\t Hooker & 1 \\\\\n", 252 | "\t Howard & 3 \\\\\n", 253 | "\t Keith & 1 \\\\\n", 254 | "\t Keya Paha & 20 \\\\\n", 255 | "\t Kimball & 1 \\\\\n", 256 | "\t Knox & 8 \\\\\n", 257 | "\t Lincoln & 10 \\\\\n", 258 | "\t Merrick & 1 \\\\\n", 259 | "\t Morrill & 2 \\\\\n", 260 | "\t Nance & 1 \\\\\n", 261 | "\t Nemaha & 5 \\\\\n", 262 | "\t Platte & 1 \\\\\n", 263 | "\t Polk & 1 \\\\\n", 264 | "\t Richardson & 2 \\\\\n", 265 | "\t Rock & 11 \\\\\n", 266 | "\t Sarpy & 1 \\\\\n", 267 | "\t Saunders & 2 \\\\\n", 268 | "\t Scotts Bluff & 26 \\\\\n", 269 | "\t sheridan & 2 \\\\\n", 270 | "\t Sheridan & 35 \\\\\n", 271 | "\t Sherman & 1 \\\\\n", 272 | "\t Sioux & 52 \\\\\n", 273 | "\t Thomas & 5 \\\\\n", 274 | "\t Thurston & 1 \\\\\n", 275 | "\t Valley & 1 \\\\\n", 276 | "\t Wheeler & 1 \\\\\n", 277 | "\\end{tabular}\n" 278 | ], 279 | "text/markdown": [ 280 | "\n", 281 | "COUNTY | count | \n", 282 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", 283 | "| Banner | 6 | \n", 284 | "| Blaine | 3 | \n", 285 | "| Box Butte | 4 | \n", 286 | "| Brown | 15 | \n", 287 | "| Buffalo | 3 | \n", 288 | "| Cedar | 1 | \n", 289 | "| Cherry | 30 | \n", 290 | "| Custer | 8 | \n", 291 | "| Dakota | 3 | \n", 292 | "| Dawes | 111 | \n", 293 | "| Dawson | 5 | \n", 294 | "| Dixon | 3 | \n", 295 | "| Douglas | 2 | \n", 296 | "| Frontier | 1 | \n", 297 | "| Hall | 1 | \n", 298 | "| Holt | 2 | \n", 299 | "| Hooker | 1 | \n", 300 | "| Howard | 3 | \n", 301 | "| Keith | 1 | \n", 302 | "| Keya Paha | 20 | \n", 303 | "| Kimball | 1 | \n", 304 | "| Knox | 8 | \n", 305 | "| Lincoln | 10 | \n", 306 | "| Merrick | 1 | \n", 307 | "| Morrill | 2 | \n", 308 | "| Nance | 1 | \n", 309 | "| Nemaha | 5 | \n", 310 | "| Platte | 1 | \n", 311 | "| Polk | 1 | \n", 312 | "| Richardson | 2 | \n", 313 | "| Rock | 11 | \n", 314 | "| Sarpy | 1 | \n", 315 | "| Saunders | 2 | \n", 316 | "| Scotts Bluff | 26 | \n", 317 | "| sheridan | 2 | \n", 318 | "| Sheridan | 35 | \n", 319 | "| Sherman | 1 | \n", 320 | "| Sioux | 52 | \n", 321 | "| Thomas | 5 | \n", 322 | "| Thurston | 1 | \n", 323 | "| Valley | 1 | \n", 324 | "| Wheeler | 1 | \n", 325 | "\n", 326 | "\n" 327 | ], 328 | "text/plain": [ 329 | " COUNTY count\n", 330 | "1 Banner 6 \n", 331 | "2 Blaine 3 \n", 332 | "3 Box Butte 4 \n", 333 | "4 Brown 15 \n", 334 | "5 Buffalo 3 \n", 335 | "6 Cedar 1 \n", 336 | "7 Cherry 30 \n", 337 | "8 Custer 8 \n", 338 | "9 Dakota 3 \n", 339 | "10 Dawes 111 \n", 340 | "11 Dawson 5 \n", 341 | "12 Dixon 3 \n", 342 | "13 Douglas 2 \n", 343 | "14 Frontier 1 \n", 344 | "15 Hall 1 \n", 345 | "16 Holt 2 \n", 346 | "17 Hooker 1 \n", 347 | "18 Howard 3 \n", 348 | "19 Keith 1 \n", 349 | "20 Keya Paha 20 \n", 350 | "21 Kimball 1 \n", 351 | "22 Knox 8 \n", 352 | "23 Lincoln 10 \n", 353 | "24 Merrick 1 \n", 354 | "25 Morrill 2 \n", 355 | "26 Nance 1 \n", 356 | "27 Nemaha 5 \n", 357 | "28 Platte 1 \n", 358 | "29 Polk 1 \n", 359 | "30 Richardson 2 \n", 360 | "31 Rock 11 \n", 361 | "32 Sarpy 1 \n", 362 | "33 Saunders 2 \n", 363 | "34 Scotts Bluff 26 \n", 364 | "35 sheridan 2 \n", 365 | "36 Sheridan 35 \n", 366 | "37 Sherman 1 \n", 367 | "38 Sioux 52 \n", 368 | "39 Thomas 5 \n", 369 | "40 Thurston 1 \n", 370 | "41 Valley 1 \n", 371 | "42 Wheeler 1 " 372 | ] 373 | }, 374 | "metadata": {}, 375 | "output_type": "display_data" 376 | } 377 | ], 378 | "source": [ 379 | "mountainlions %>%\n", 380 | " group_by(COUNTY) %>%\n", 381 | " summarise(\n", 382 | " count = n(),\n", 383 | " ) " 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "So let's walk through that. We start with our dataset -- `mountainlions` -- and then we tell it to group the data by a given field in the data. In this case, we wanted to group together all the counties, signified by the field name COUNTY, which you could get from looking at `head(mountainlions)`. So after we group the data, we need to count them up. In dplyr, we use `summarize` [which can do more than just count things](http://dplyr.tidyverse.org/reference/summarise.html). So inside the parentheses in summarize, we set up the summaries we want. In this case, we just want a count of the counties. So `count = n(),` says create a new field, called `count` and set it equal to `n()`, which might look weird, but it's common in stats. The number of things in a dataset? Statisticians call in n. There are n number of incidents in this dataset. So `n()` is a function that counts the number of things there are. \n", 391 | "\n", 392 | "And when we run that, we get a list of counties with a count next to them. But it's not in any order. So we'll add another And Then Do This %>% and use `arrange`. Arrange does what you think it does -- it arranges data in order. By default, it's in ascending order -- smallest to largest. But if we want to know the county with the most mountain lion sightings, we need to sort it in descending order. That looks like this:" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 12, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/html": [ 403 | "\n", 404 | "\n", 405 | "\n", 406 | "\t\n", 407 | "\t\n", 408 | "\t\n", 409 | "\t\n", 410 | "\t\n", 411 | "\t\n", 412 | "\t\n", 413 | "\t\n", 414 | "\t\n", 415 | "\t\n", 416 | "\t\n", 417 | "\t\n", 418 | "\t\n", 419 | "\t\n", 420 | "\t\n", 421 | "\t\n", 422 | "\t\n", 423 | "\t\n", 424 | "\t\n", 425 | "\t\n", 426 | "\t\n", 427 | "\t\n", 428 | "\t\n", 429 | "\t\n", 430 | "\t\n", 431 | "\t\n", 432 | "\t\n", 433 | "\t\n", 434 | "\t\n", 435 | "\t\n", 436 | "\t\n", 437 | "\t\n", 438 | "\t\n", 439 | "\t\n", 440 | "\t\n", 441 | "\t\n", 442 | "\t\n", 443 | "\t\n", 444 | "\t\n", 445 | "\t\n", 446 | "\t\n", 447 | "\t\n", 448 | "\n", 449 | "
COUNTYcount
Dawes 111
Sioux 52
Sheridan 35
Cherry 30
Scotts Bluff 26
Keya Paha 20
Brown 15
Rock 11
Lincoln 10
Custer 8
Knox 8
Banner 6
Dawson 5
Nemaha 5
Thomas 5
Box Butte 4
Blaine 3
Buffalo 3
Dakota 3
Dixon 3
Howard 3
Douglas 2
Holt 2
Morrill 2
Richardson 2
Saunders 2
sheridan 2
Cedar 1
Frontier 1
Hall 1
Hooker 1
Keith 1
Kimball 1
Merrick 1
Nance 1
Platte 1
Polk 1
Sarpy 1
Sherman 1
Thurston 1
Valley 1
Wheeler 1
\n" 450 | ], 451 | "text/latex": [ 452 | "\\begin{tabular}{r|ll}\n", 453 | " COUNTY & count\\\\\n", 454 | "\\hline\n", 455 | "\t Dawes & 111 \\\\\n", 456 | "\t Sioux & 52 \\\\\n", 457 | "\t Sheridan & 35 \\\\\n", 458 | "\t Cherry & 30 \\\\\n", 459 | "\t Scotts Bluff & 26 \\\\\n", 460 | "\t Keya Paha & 20 \\\\\n", 461 | "\t Brown & 15 \\\\\n", 462 | "\t Rock & 11 \\\\\n", 463 | "\t Lincoln & 10 \\\\\n", 464 | "\t Custer & 8 \\\\\n", 465 | "\t Knox & 8 \\\\\n", 466 | "\t Banner & 6 \\\\\n", 467 | "\t Dawson & 5 \\\\\n", 468 | "\t Nemaha & 5 \\\\\n", 469 | "\t Thomas & 5 \\\\\n", 470 | "\t Box Butte & 4 \\\\\n", 471 | "\t Blaine & 3 \\\\\n", 472 | "\t Buffalo & 3 \\\\\n", 473 | "\t Dakota & 3 \\\\\n", 474 | "\t Dixon & 3 \\\\\n", 475 | "\t Howard & 3 \\\\\n", 476 | "\t Douglas & 2 \\\\\n", 477 | "\t Holt & 2 \\\\\n", 478 | "\t Morrill & 2 \\\\\n", 479 | "\t Richardson & 2 \\\\\n", 480 | "\t Saunders & 2 \\\\\n", 481 | "\t sheridan & 2 \\\\\n", 482 | "\t Cedar & 1 \\\\\n", 483 | "\t Frontier & 1 \\\\\n", 484 | "\t Hall & 1 \\\\\n", 485 | "\t Hooker & 1 \\\\\n", 486 | "\t Keith & 1 \\\\\n", 487 | "\t Kimball & 1 \\\\\n", 488 | "\t Merrick & 1 \\\\\n", 489 | "\t Nance & 1 \\\\\n", 490 | "\t Platte & 1 \\\\\n", 491 | "\t Polk & 1 \\\\\n", 492 | "\t Sarpy & 1 \\\\\n", 493 | "\t Sherman & 1 \\\\\n", 494 | "\t Thurston & 1 \\\\\n", 495 | "\t Valley & 1 \\\\\n", 496 | "\t Wheeler & 1 \\\\\n", 497 | "\\end{tabular}\n" 498 | ], 499 | "text/markdown": [ 500 | "\n", 501 | "COUNTY | count | \n", 502 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", 503 | "| Dawes | 111 | \n", 504 | "| Sioux | 52 | \n", 505 | "| Sheridan | 35 | \n", 506 | "| Cherry | 30 | \n", 507 | "| Scotts Bluff | 26 | \n", 508 | "| Keya Paha | 20 | \n", 509 | "| Brown | 15 | \n", 510 | "| Rock | 11 | \n", 511 | "| Lincoln | 10 | \n", 512 | "| Custer | 8 | \n", 513 | "| Knox | 8 | \n", 514 | "| Banner | 6 | \n", 515 | "| Dawson | 5 | \n", 516 | "| Nemaha | 5 | \n", 517 | "| Thomas | 5 | \n", 518 | "| Box Butte | 4 | \n", 519 | "| Blaine | 3 | \n", 520 | "| Buffalo | 3 | \n", 521 | "| Dakota | 3 | \n", 522 | "| Dixon | 3 | \n", 523 | "| Howard | 3 | \n", 524 | "| Douglas | 2 | \n", 525 | "| Holt | 2 | \n", 526 | "| Morrill | 2 | \n", 527 | "| Richardson | 2 | \n", 528 | "| Saunders | 2 | \n", 529 | "| sheridan | 2 | \n", 530 | "| Cedar | 1 | \n", 531 | "| Frontier | 1 | \n", 532 | "| Hall | 1 | \n", 533 | "| Hooker | 1 | \n", 534 | "| Keith | 1 | \n", 535 | "| Kimball | 1 | \n", 536 | "| Merrick | 1 | \n", 537 | "| Nance | 1 | \n", 538 | "| Platte | 1 | \n", 539 | "| Polk | 1 | \n", 540 | "| Sarpy | 1 | \n", 541 | "| Sherman | 1 | \n", 542 | "| Thurston | 1 | \n", 543 | "| Valley | 1 | \n", 544 | "| Wheeler | 1 | \n", 545 | "\n", 546 | "\n" 547 | ], 548 | "text/plain": [ 549 | " COUNTY count\n", 550 | "1 Dawes 111 \n", 551 | "2 Sioux 52 \n", 552 | "3 Sheridan 35 \n", 553 | "4 Cherry 30 \n", 554 | "5 Scotts Bluff 26 \n", 555 | "6 Keya Paha 20 \n", 556 | "7 Brown 15 \n", 557 | "8 Rock 11 \n", 558 | "9 Lincoln 10 \n", 559 | "10 Custer 8 \n", 560 | "11 Knox 8 \n", 561 | "12 Banner 6 \n", 562 | "13 Dawson 5 \n", 563 | "14 Nemaha 5 \n", 564 | "15 Thomas 5 \n", 565 | "16 Box Butte 4 \n", 566 | "17 Blaine 3 \n", 567 | "18 Buffalo 3 \n", 568 | "19 Dakota 3 \n", 569 | "20 Dixon 3 \n", 570 | "21 Howard 3 \n", 571 | "22 Douglas 2 \n", 572 | "23 Holt 2 \n", 573 | "24 Morrill 2 \n", 574 | "25 Richardson 2 \n", 575 | "26 Saunders 2 \n", 576 | "27 sheridan 2 \n", 577 | "28 Cedar 1 \n", 578 | "29 Frontier 1 \n", 579 | "30 Hall 1 \n", 580 | "31 Hooker 1 \n", 581 | "32 Keith 1 \n", 582 | "33 Kimball 1 \n", 583 | "34 Merrick 1 \n", 584 | "35 Nance 1 \n", 585 | "36 Platte 1 \n", 586 | "37 Polk 1 \n", 587 | "38 Sarpy 1 \n", 588 | "39 Sherman 1 \n", 589 | "40 Thurston 1 \n", 590 | "41 Valley 1 \n", 591 | "42 Wheeler 1 " 592 | ] 593 | }, 594 | "metadata": {}, 595 | "output_type": "display_data" 596 | } 597 | ], 598 | "source": [ 599 | "mountainlions %>%\n", 600 | " group_by(COUNTY) %>%\n", 601 | " summarise(\n", 602 | " count = n(),\n", 603 | " ) %>% arrange(desc(count))" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "## Assignment\n", 611 | "\n", 612 | "Answer this question using what you have learned in this walkthrough. \n", 613 | "\n", 614 | "**What are the most common incidents UNL police reported from 2013 and 2016?**\n", 615 | "\n", 616 | "To do this, you'll need to [download this data](https://www.dropbox.com/s/47zogziohseavh7/unlcrime.csv?dl=0). \n", 617 | "\n", 618 | "#### Rubric\n", 619 | "\n", 620 | "1. Did you read the data into a dataframe? \n", 621 | "2. Did you use group by syntax correctly? \n", 622 | "3. Did you use summarize syntax correctly?\n", 623 | "4. Did you use arrange syntax correctly?\n", 624 | "5. Did you use Markdown comments to explain your steps? " 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": { 631 | "collapsed": true 632 | }, 633 | "outputs": [], 634 | "source": [] 635 | } 636 | ], 637 | "metadata": { 638 | "anaconda-cloud": {}, 639 | "kernelspec": { 640 | "display_name": "R", 641 | "language": "R", 642 | "name": "ir" 643 | }, 644 | "language_info": { 645 | "codemirror_mode": "r", 646 | "file_extension": ".r", 647 | "mimetype": "text/x-r-source", 648 | "name": "R", 649 | "pygments_lexer": "r", 650 | "version": "3.4.1" 651 | } 652 | }, 653 | "nbformat": 4, 654 | "nbformat_minor": 1 655 | } 656 | -------------------------------------------------------------------------------- /Assignments/16_LiveFireExercise/LiveFireExercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Live fire: Census estimates release day\n", 8 | "\n", 9 | "Every year, the US Census Bureau releases new estimates of the population of every metropolitan area, county, city and town in the US. They are estimates because they only do the headcount census every 10 years. Between then, they use data and modeling to estimate what the population is. Every 10 years, they recalibrate their models based on how close they came to getting it right, given the headcount census. \n", 10 | "\n", 11 | "Today, we're going to simulate being in a newsroom on the day these new data are released. We're going to look at how a local news organization handled it, and we're going to show how a little bit of R and ggplot knowhow can make this better, easier and pushbutton quick next year. \n", 12 | "\n", 13 | "First, let's talk about how [a local newspaper covered it](http://journalstar.com/business/local/census-nebraska-s-big-counties-growing-rest-of-state-not/article_4317e30c-2a4b-5184-a888-ccebd4a22a04.html). What did they choose to focus on? What numerical measures did they use? Were they the right ones? Were they useful? Did they use any visuals? What could they have done differently?\n", 14 | "\n", 15 | "Now let's take our own crack at this. You are now on deadline. You have until the end of class to create a visual story out of this data, looking at the state of Nebraska. You will need to:\n", 16 | "\n", 17 | "* Create some tables of data to show trends.\n", 18 | "* Create at least two visualizations of the data.\n", 19 | "\n", 20 | "Some suggestions: Fastest growing? Fastest shrinking? Gainers to losers? One-year change vs since 2010? Every county in a lattice chart? Urban vs rural? Counties that have lost population every year this decade? Gained?\n", 21 | "\n", 22 | "Pair up, plan what you are going to do, and get started. To help you, here's some boilerplate code to get you going. **NOTE THE `read.csv` BITS. IT'S PULLING THE DATA STRAIGHT FROM THE URL.**" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stderr", 32 | "output_type": "stream", 33 | "text": [ 34 | "\n", 35 | "Attaching package: ‘dplyr’\n", 36 | "\n", 37 | "The following objects are masked from ‘package:stats’:\n", 38 | "\n", 39 | " filter, lag\n", 40 | "\n", 41 | "The following objects are masked from ‘package:base’:\n", 42 | "\n", 43 | " intersect, setdiff, setequal, union\n", 44 | "\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "library(dplyr)\n", 50 | "library(ggplot2)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "counties <- read.csv(url(\"https://www2.census.gov/programs-surveys/popest/datasets/2010-2017/counties/totals/co-est2017-alldata.csv\"))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "\n", 71 | "\n", 72 | "\n", 73 | "\t\n", 74 | "\t\n", 75 | "\t\n", 76 | "\t\n", 77 | "\t\n", 78 | "\t\n", 79 | "\n", 80 | "
SUMLEVREGIONDIVISIONSTATECOUNTYSTNAMECTYNAMECENSUS2010POPESTIMATESBASE2010POPESTIMATE2010RDOMESTICMIG2015RDOMESTICMIG2016RDOMESTICMIG2017RNETMIG2011RNETMIG2012RNETMIG2013RNETMIG2014RNETMIG2015RNETMIG2016RNETMIG2017
40 3 6 1 0 Alabama Alabama 4779736 4780135 4785579 -0.3172050 -0.404473 0.7888823 0.4507405 0.9393925 1.3642955 0.6942708 0.6785751 0.5589306 1.708218
50 3 6 1 1 Alabama Autauga County 54571 54571 54750 -1.9507393 4.831269 1.0471015 5.9118318 -6.1021012 -4.0502819 2.0993255 -1.6590399 5.1037088 1.317904
50 3 6 1 3 Alabama Baldwin County 182265 182265 183110 17.0478719 20.493601 22.3831750 16.2859400 17.1967858 22.6152855 20.3809040 17.9037487 21.3172439 23.163873
50 3 6 1 5 Alabama Barbour County 27457 27457 27332 -16.2224360 -18.755525 -19.0423948 0.2560211 -6.8224333 -8.0189202 -5.5497616 -16.4110690 -18.9476921 -19.159940
50 3 6 1 7 Alabama Bibb County 22915 22919 22872 0.9313878 -1.416117 -0.8829827 -5.0419800 -4.0966456 -5.8900379 1.2434497 1.8184237 -0.5310439 0.000000
50 3 6 1 9 Alabama Blount County 57322 57324 57381 -1.5633685 -1.736835 6.2124162 0.2435990 -1.3546723 -0.4860352 -1.7713100 -0.5384936 -0.6599972 7.285313
\n" 81 | ], 82 | "text/latex": [ 83 | "\\begin{tabular}{r|llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll}\n", 84 | " SUMLEV & REGION & DIVISION & STATE & COUNTY & STNAME & CTYNAME & CENSUS2010POP & ESTIMATESBASE2010 & POPESTIMATE2010 & ⋯ & RDOMESTICMIG2015 & RDOMESTICMIG2016 & RDOMESTICMIG2017 & RNETMIG2011 & RNETMIG2012 & RNETMIG2013 & RNETMIG2014 & RNETMIG2015 & RNETMIG2016 & RNETMIG2017\\\\\n", 85 | "\\hline\n", 86 | "\t 40 & 3 & 6 & 1 & 0 & Alabama & Alabama & 4779736 & 4780135 & 4785579 & ⋯ & -0.3172050 & -0.404473 & 0.7888823 & 0.4507405 & 0.9393925 & 1.3642955 & 0.6942708 & 0.6785751 & 0.5589306 & 1.708218 \\\\\n", 87 | "\t 50 & 3 & 6 & 1 & 1 & Alabama & Autauga County & 54571 & 54571 & 54750 & ⋯ & -1.9507393 & 4.831269 & 1.0471015 & 5.9118318 & -6.1021012 & -4.0502819 & 2.0993255 & -1.6590399 & 5.1037088 & 1.317904 \\\\\n", 88 | "\t 50 & 3 & 6 & 1 & 3 & Alabama & Baldwin County & 182265 & 182265 & 183110 & ⋯ & 17.0478719 & 20.493601 & 22.3831750 & 16.2859400 & 17.1967858 & 22.6152855 & 20.3809040 & 17.9037487 & 21.3172439 & 23.163873 \\\\\n", 89 | "\t 50 & 3 & 6 & 1 & 5 & Alabama & Barbour County & 27457 & 27457 & 27332 & ⋯ & -16.2224360 & -18.755525 & -19.0423948 & 0.2560211 & -6.8224333 & -8.0189202 & -5.5497616 & -16.4110690 & -18.9476921 & -19.159940 \\\\\n", 90 | "\t 50 & 3 & 6 & 1 & 7 & Alabama & Bibb County & 22915 & 22919 & 22872 & ⋯ & 0.9313878 & -1.416117 & -0.8829827 & -5.0419800 & -4.0966456 & -5.8900379 & 1.2434497 & 1.8184237 & -0.5310439 & 0.000000 \\\\\n", 91 | "\t 50 & 3 & 6 & 1 & 9 & Alabama & Blount County & 57322 & 57324 & 57381 & ⋯ & -1.5633685 & -1.736835 & 6.2124162 & 0.2435990 & -1.3546723 & -0.4860352 & -1.7713100 & -0.5384936 & -0.6599972 & 7.285313 \\\\\n", 92 | "\\end{tabular}\n" 93 | ], 94 | "text/markdown": [ 95 | "\n", 96 | "SUMLEV | REGION | DIVISION | STATE | COUNTY | STNAME | CTYNAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | ⋯ | RDOMESTICMIG2015 | RDOMESTICMIG2016 | RDOMESTICMIG2017 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | RNETMIG2016 | RNETMIG2017 | \n", 97 | "|---|---|---|---|---|---|\n", 98 | "| 40 | 3 | 6 | 1 | 0 | Alabama | Alabama | 4779736 | 4780135 | 4785579 | ⋯ | -0.3172050 | -0.404473 | 0.7888823 | 0.4507405 | 0.9393925 | 1.3642955 | 0.6942708 | 0.6785751 | 0.5589306 | 1.708218 | \n", 99 | "| 50 | 3 | 6 | 1 | 1 | Alabama | Autauga County | 54571 | 54571 | 54750 | ⋯ | -1.9507393 | 4.831269 | 1.0471015 | 5.9118318 | -6.1021012 | -4.0502819 | 2.0993255 | -1.6590399 | 5.1037088 | 1.317904 | \n", 100 | "| 50 | 3 | 6 | 1 | 3 | Alabama | Baldwin County | 182265 | 182265 | 183110 | ⋯ | 17.0478719 | 20.493601 | 22.3831750 | 16.2859400 | 17.1967858 | 22.6152855 | 20.3809040 | 17.9037487 | 21.3172439 | 23.163873 | \n", 101 | "| 50 | 3 | 6 | 1 | 5 | Alabama | Barbour County | 27457 | 27457 | 27332 | ⋯ | -16.2224360 | -18.755525 | -19.0423948 | 0.2560211 | -6.8224333 | -8.0189202 | -5.5497616 | -16.4110690 | -18.9476921 | -19.159940 | \n", 102 | "| 50 | 3 | 6 | 1 | 7 | Alabama | Bibb County | 22915 | 22919 | 22872 | ⋯ | 0.9313878 | -1.416117 | -0.8829827 | -5.0419800 | -4.0966456 | -5.8900379 | 1.2434497 | 1.8184237 | -0.5310439 | 0.000000 | \n", 103 | "| 50 | 3 | 6 | 1 | 9 | Alabama | Blount County | 57322 | 57324 | 57381 | ⋯ | -1.5633685 | -1.736835 | 6.2124162 | 0.2435990 | -1.3546723 | -0.4860352 | -1.7713100 | -0.5384936 | -0.6599972 | 7.285313 | \n", 104 | "\n", 105 | "\n" 106 | ], 107 | "text/plain": [ 108 | " SUMLEV REGION DIVISION STATE COUNTY STNAME CTYNAME CENSUS2010POP\n", 109 | "1 40 3 6 1 0 Alabama Alabama 4779736 \n", 110 | "2 50 3 6 1 1 Alabama Autauga County 54571 \n", 111 | "3 50 3 6 1 3 Alabama Baldwin County 182265 \n", 112 | "4 50 3 6 1 5 Alabama Barbour County 27457 \n", 113 | "5 50 3 6 1 7 Alabama Bibb County 22915 \n", 114 | "6 50 3 6 1 9 Alabama Blount County 57322 \n", 115 | " ESTIMATESBASE2010 POPESTIMATE2010 ⋯ RDOMESTICMIG2015 RDOMESTICMIG2016\n", 116 | "1 4780135 4785579 ⋯ -0.3172050 -0.404473 \n", 117 | "2 54571 54750 ⋯ -1.9507393 4.831269 \n", 118 | "3 182265 183110 ⋯ 17.0478719 20.493601 \n", 119 | "4 27457 27332 ⋯ -16.2224360 -18.755525 \n", 120 | "5 22919 22872 ⋯ 0.9313878 -1.416117 \n", 121 | "6 57324 57381 ⋯ -1.5633685 -1.736835 \n", 122 | " RDOMESTICMIG2017 RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015\n", 123 | "1 0.7888823 0.4507405 0.9393925 1.3642955 0.6942708 0.6785751\n", 124 | "2 1.0471015 5.9118318 -6.1021012 -4.0502819 2.0993255 -1.6590399\n", 125 | "3 22.3831750 16.2859400 17.1967858 22.6152855 20.3809040 17.9037487\n", 126 | "4 -19.0423948 0.2560211 -6.8224333 -8.0189202 -5.5497616 -16.4110690\n", 127 | "5 -0.8829827 -5.0419800 -4.0966456 -5.8900379 1.2434497 1.8184237\n", 128 | "6 6.2124162 0.2435990 -1.3546723 -0.4860352 -1.7713100 -0.5384936\n", 129 | " RNETMIG2016 RNETMIG2017\n", 130 | "1 0.5589306 1.708218 \n", 131 | "2 5.1037088 1.317904 \n", 132 | "3 21.3172439 23.163873 \n", 133 | "4 -18.9476921 -19.159940 \n", 134 | "5 -0.5310439 0.000000 \n", 135 | "6 -0.6599972 7.285313 " 136 | ] 137 | }, 138 | "metadata": {}, 139 | "output_type": "display_data" 140 | } 141 | ], 142 | "source": [ 143 | "head(counties)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/html": [ 154 | "
    \n", 155 | "\t
  1. 'SUMLEV'
  2. \n", 156 | "\t
  3. 'REGION'
  4. \n", 157 | "\t
  5. 'DIVISION'
  6. \n", 158 | "\t
  7. 'STATE'
  8. \n", 159 | "\t
  9. 'COUNTY'
  10. \n", 160 | "\t
  11. 'STNAME'
  12. \n", 161 | "\t
  13. 'CTYNAME'
  14. \n", 162 | "\t
  15. 'CENSUS2010POP'
  16. \n", 163 | "\t
  17. 'ESTIMATESBASE2010'
  18. \n", 164 | "\t
  19. 'POPESTIMATE2010'
  20. \n", 165 | "\t
  21. 'POPESTIMATE2011'
  22. \n", 166 | "\t
  23. 'POPESTIMATE2012'
  24. \n", 167 | "\t
  25. 'POPESTIMATE2013'
  26. \n", 168 | "\t
  27. 'POPESTIMATE2014'
  28. \n", 169 | "\t
  29. 'POPESTIMATE2015'
  30. \n", 170 | "\t
  31. 'POPESTIMATE2016'
  32. \n", 171 | "\t
  33. 'POPESTIMATE2017'
  34. \n", 172 | "\t
  35. 'NPOPCHG_2010'
  36. \n", 173 | "\t
  37. 'NPOPCHG_2011'
  38. \n", 174 | "\t
  39. 'NPOPCHG_2012'
  40. \n", 175 | "\t
  41. 'NPOPCHG_2013'
  42. \n", 176 | "\t
  43. 'NPOPCHG_2014'
  44. \n", 177 | "\t
  45. 'NPOPCHG_2015'
  46. \n", 178 | "\t
  47. 'NPOPCHG_2016'
  48. \n", 179 | "\t
  49. 'NPOPCHG_2017'
  50. \n", 180 | "\t
  51. 'BIRTHS2010'
  52. \n", 181 | "\t
  53. 'BIRTHS2011'
  54. \n", 182 | "\t
  55. 'BIRTHS2012'
  56. \n", 183 | "\t
  57. 'BIRTHS2013'
  58. \n", 184 | "\t
  59. 'BIRTHS2014'
  60. \n", 185 | "\t
  61. 'BIRTHS2015'
  62. \n", 186 | "\t
  63. 'BIRTHS2016'
  64. \n", 187 | "\t
  65. 'BIRTHS2017'
  66. \n", 188 | "\t
  67. 'DEATHS2010'
  68. \n", 189 | "\t
  69. 'DEATHS2011'
  70. \n", 190 | "\t
  71. 'DEATHS2012'
  72. \n", 191 | "\t
  73. 'DEATHS2013'
  74. \n", 192 | "\t
  75. 'DEATHS2014'
  76. \n", 193 | "\t
  77. 'DEATHS2015'
  78. \n", 194 | "\t
  79. 'DEATHS2016'
  80. \n", 195 | "\t
  81. 'DEATHS2017'
  82. \n", 196 | "\t
  83. 'NATURALINC2010'
  84. \n", 197 | "\t
  85. 'NATURALINC2011'
  86. \n", 198 | "\t
  87. 'NATURALINC2012'
  88. \n", 199 | "\t
  89. 'NATURALINC2013'
  90. \n", 200 | "\t
  91. 'NATURALINC2014'
  92. \n", 201 | "\t
  93. 'NATURALINC2015'
  94. \n", 202 | "\t
  95. 'NATURALINC2016'
  96. \n", 203 | "\t
  97. 'NATURALINC2017'
  98. \n", 204 | "\t
  99. 'INTERNATIONALMIG2010'
  100. \n", 205 | "\t
  101. 'INTERNATIONALMIG2011'
  102. \n", 206 | "\t
  103. 'INTERNATIONALMIG2012'
  104. \n", 207 | "\t
  105. 'INTERNATIONALMIG2013'
  106. \n", 208 | "\t
  107. 'INTERNATIONALMIG2014'
  108. \n", 209 | "\t
  109. 'INTERNATIONALMIG2015'
  110. \n", 210 | "\t
  111. 'INTERNATIONALMIG2016'
  112. \n", 211 | "\t
  113. 'INTERNATIONALMIG2017'
  114. \n", 212 | "\t
  115. 'DOMESTICMIG2010'
  116. \n", 213 | "\t
  117. 'DOMESTICMIG2011'
  118. \n", 214 | "\t
  119. 'DOMESTICMIG2012'
  120. \n", 215 | "\t
  121. 'DOMESTICMIG2013'
  122. \n", 216 | "\t
  123. 'DOMESTICMIG2014'
  124. \n", 217 | "\t
  125. 'DOMESTICMIG2015'
  126. \n", 218 | "\t
  127. 'DOMESTICMIG2016'
  128. \n", 219 | "\t
  129. 'DOMESTICMIG2017'
  130. \n", 220 | "\t
  131. 'NETMIG2010'
  132. \n", 221 | "\t
  133. 'NETMIG2011'
  134. \n", 222 | "\t
  135. 'NETMIG2012'
  136. \n", 223 | "\t
  137. 'NETMIG2013'
  138. \n", 224 | "\t
  139. 'NETMIG2014'
  140. \n", 225 | "\t
  141. 'NETMIG2015'
  142. \n", 226 | "\t
  143. 'NETMIG2016'
  144. \n", 227 | "\t
  145. 'NETMIG2017'
  146. \n", 228 | "\t
  147. 'RESIDUAL2010'
  148. \n", 229 | "\t
  149. 'RESIDUAL2011'
  150. \n", 230 | "\t
  151. 'RESIDUAL2012'
  152. \n", 231 | "\t
  153. 'RESIDUAL2013'
  154. \n", 232 | "\t
  155. 'RESIDUAL2014'
  156. \n", 233 | "\t
  157. 'RESIDUAL2015'
  158. \n", 234 | "\t
  159. 'RESIDUAL2016'
  160. \n", 235 | "\t
  161. 'RESIDUAL2017'
  162. \n", 236 | "\t
  163. 'GQESTIMATESBASE2010'
  164. \n", 237 | "\t
  165. 'GQESTIMATES2010'
  166. \n", 238 | "\t
  167. 'GQESTIMATES2011'
  168. \n", 239 | "\t
  169. 'GQESTIMATES2012'
  170. \n", 240 | "\t
  171. 'GQESTIMATES2013'
  172. \n", 241 | "\t
  173. 'GQESTIMATES2014'
  174. \n", 242 | "\t
  175. 'GQESTIMATES2015'
  176. \n", 243 | "\t
  177. 'GQESTIMATES2016'
  178. \n", 244 | "\t
  179. 'GQESTIMATES2017'
  180. \n", 245 | "\t
  181. 'RBIRTH2011'
  182. \n", 246 | "\t
  183. 'RBIRTH2012'
  184. \n", 247 | "\t
  185. 'RBIRTH2013'
  186. \n", 248 | "\t
  187. 'RBIRTH2014'
  188. \n", 249 | "\t
  189. 'RBIRTH2015'
  190. \n", 250 | "\t
  191. 'RBIRTH2016'
  192. \n", 251 | "\t
  193. 'RBIRTH2017'
  194. \n", 252 | "\t
  195. 'RDEATH2011'
  196. \n", 253 | "\t
  197. 'RDEATH2012'
  198. \n", 254 | "\t
  199. 'RDEATH2013'
  200. \n", 255 | "\t
  201. 'RDEATH2014'
  202. \n", 256 | "\t
  203. 'RDEATH2015'
  204. \n", 257 | "\t
  205. 'RDEATH2016'
  206. \n", 258 | "\t
  207. 'RDEATH2017'
  208. \n", 259 | "\t
  209. 'RNATURALINC2011'
  210. \n", 260 | "\t
  211. 'RNATURALINC2012'
  212. \n", 261 | "\t
  213. 'RNATURALINC2013'
  214. \n", 262 | "\t
  215. 'RNATURALINC2014'
  216. \n", 263 | "\t
  217. 'RNATURALINC2015'
  218. \n", 264 | "\t
  219. 'RNATURALINC2016'
  220. \n", 265 | "\t
  221. 'RNATURALINC2017'
  222. \n", 266 | "\t
  223. 'RINTERNATIONALMIG2011'
  224. \n", 267 | "\t
  225. 'RINTERNATIONALMIG2012'
  226. \n", 268 | "\t
  227. 'RINTERNATIONALMIG2013'
  228. \n", 269 | "\t
  229. 'RINTERNATIONALMIG2014'
  230. \n", 270 | "\t
  231. 'RINTERNATIONALMIG2015'
  232. \n", 271 | "\t
  233. 'RINTERNATIONALMIG2016'
  234. \n", 272 | "\t
  235. 'RINTERNATIONALMIG2017'
  236. \n", 273 | "\t
  237. 'RDOMESTICMIG2011'
  238. \n", 274 | "\t
  239. 'RDOMESTICMIG2012'
  240. \n", 275 | "\t
  241. 'RDOMESTICMIG2013'
  242. \n", 276 | "\t
  243. 'RDOMESTICMIG2014'
  244. \n", 277 | "\t
  245. 'RDOMESTICMIG2015'
  246. \n", 278 | "\t
  247. 'RDOMESTICMIG2016'
  248. \n", 279 | "\t
  249. 'RDOMESTICMIG2017'
  250. \n", 280 | "\t
  251. 'RNETMIG2011'
  252. \n", 281 | "\t
  253. 'RNETMIG2012'
  254. \n", 282 | "\t
  255. 'RNETMIG2013'
  256. \n", 283 | "\t
  257. 'RNETMIG2014'
  258. \n", 284 | "\t
  259. 'RNETMIG2015'
  260. \n", 285 | "\t
  261. 'RNETMIG2016'
  262. \n", 286 | "\t
  263. 'RNETMIG2017'
  264. \n", 287 | "
\n" 288 | ], 289 | "text/latex": [ 290 | "\\begin{enumerate*}\n", 291 | "\\item 'SUMLEV'\n", 292 | "\\item 'REGION'\n", 293 | "\\item 'DIVISION'\n", 294 | "\\item 'STATE'\n", 295 | "\\item 'COUNTY'\n", 296 | "\\item 'STNAME'\n", 297 | "\\item 'CTYNAME'\n", 298 | "\\item 'CENSUS2010POP'\n", 299 | "\\item 'ESTIMATESBASE2010'\n", 300 | "\\item 'POPESTIMATE2010'\n", 301 | "\\item 'POPESTIMATE2011'\n", 302 | "\\item 'POPESTIMATE2012'\n", 303 | "\\item 'POPESTIMATE2013'\n", 304 | "\\item 'POPESTIMATE2014'\n", 305 | "\\item 'POPESTIMATE2015'\n", 306 | "\\item 'POPESTIMATE2016'\n", 307 | "\\item 'POPESTIMATE2017'\n", 308 | "\\item 'NPOPCHG\\_2010'\n", 309 | "\\item 'NPOPCHG\\_2011'\n", 310 | "\\item 'NPOPCHG\\_2012'\n", 311 | "\\item 'NPOPCHG\\_2013'\n", 312 | "\\item 'NPOPCHG\\_2014'\n", 313 | "\\item 'NPOPCHG\\_2015'\n", 314 | "\\item 'NPOPCHG\\_2016'\n", 315 | "\\item 'NPOPCHG\\_2017'\n", 316 | "\\item 'BIRTHS2010'\n", 317 | "\\item 'BIRTHS2011'\n", 318 | "\\item 'BIRTHS2012'\n", 319 | "\\item 'BIRTHS2013'\n", 320 | "\\item 'BIRTHS2014'\n", 321 | "\\item 'BIRTHS2015'\n", 322 | "\\item 'BIRTHS2016'\n", 323 | "\\item 'BIRTHS2017'\n", 324 | "\\item 'DEATHS2010'\n", 325 | "\\item 'DEATHS2011'\n", 326 | "\\item 'DEATHS2012'\n", 327 | "\\item 'DEATHS2013'\n", 328 | "\\item 'DEATHS2014'\n", 329 | "\\item 'DEATHS2015'\n", 330 | "\\item 'DEATHS2016'\n", 331 | "\\item 'DEATHS2017'\n", 332 | "\\item 'NATURALINC2010'\n", 333 | "\\item 'NATURALINC2011'\n", 334 | "\\item 'NATURALINC2012'\n", 335 | "\\item 'NATURALINC2013'\n", 336 | "\\item 'NATURALINC2014'\n", 337 | "\\item 'NATURALINC2015'\n", 338 | "\\item 'NATURALINC2016'\n", 339 | "\\item 'NATURALINC2017'\n", 340 | "\\item 'INTERNATIONALMIG2010'\n", 341 | "\\item 'INTERNATIONALMIG2011'\n", 342 | "\\item 'INTERNATIONALMIG2012'\n", 343 | "\\item 'INTERNATIONALMIG2013'\n", 344 | "\\item 'INTERNATIONALMIG2014'\n", 345 | "\\item 'INTERNATIONALMIG2015'\n", 346 | "\\item 'INTERNATIONALMIG2016'\n", 347 | "\\item 'INTERNATIONALMIG2017'\n", 348 | "\\item 'DOMESTICMIG2010'\n", 349 | "\\item 'DOMESTICMIG2011'\n", 350 | "\\item 'DOMESTICMIG2012'\n", 351 | "\\item 'DOMESTICMIG2013'\n", 352 | "\\item 'DOMESTICMIG2014'\n", 353 | "\\item 'DOMESTICMIG2015'\n", 354 | "\\item 'DOMESTICMIG2016'\n", 355 | "\\item 'DOMESTICMIG2017'\n", 356 | "\\item 'NETMIG2010'\n", 357 | "\\item 'NETMIG2011'\n", 358 | "\\item 'NETMIG2012'\n", 359 | "\\item 'NETMIG2013'\n", 360 | "\\item 'NETMIG2014'\n", 361 | "\\item 'NETMIG2015'\n", 362 | "\\item 'NETMIG2016'\n", 363 | "\\item 'NETMIG2017'\n", 364 | "\\item 'RESIDUAL2010'\n", 365 | "\\item 'RESIDUAL2011'\n", 366 | "\\item 'RESIDUAL2012'\n", 367 | "\\item 'RESIDUAL2013'\n", 368 | "\\item 'RESIDUAL2014'\n", 369 | "\\item 'RESIDUAL2015'\n", 370 | "\\item 'RESIDUAL2016'\n", 371 | "\\item 'RESIDUAL2017'\n", 372 | "\\item 'GQESTIMATESBASE2010'\n", 373 | "\\item 'GQESTIMATES2010'\n", 374 | "\\item 'GQESTIMATES2011'\n", 375 | "\\item 'GQESTIMATES2012'\n", 376 | "\\item 'GQESTIMATES2013'\n", 377 | "\\item 'GQESTIMATES2014'\n", 378 | "\\item 'GQESTIMATES2015'\n", 379 | "\\item 'GQESTIMATES2016'\n", 380 | "\\item 'GQESTIMATES2017'\n", 381 | "\\item 'RBIRTH2011'\n", 382 | "\\item 'RBIRTH2012'\n", 383 | "\\item 'RBIRTH2013'\n", 384 | "\\item 'RBIRTH2014'\n", 385 | "\\item 'RBIRTH2015'\n", 386 | "\\item 'RBIRTH2016'\n", 387 | "\\item 'RBIRTH2017'\n", 388 | "\\item 'RDEATH2011'\n", 389 | "\\item 'RDEATH2012'\n", 390 | "\\item 'RDEATH2013'\n", 391 | "\\item 'RDEATH2014'\n", 392 | "\\item 'RDEATH2015'\n", 393 | "\\item 'RDEATH2016'\n", 394 | "\\item 'RDEATH2017'\n", 395 | "\\item 'RNATURALINC2011'\n", 396 | "\\item 'RNATURALINC2012'\n", 397 | "\\item 'RNATURALINC2013'\n", 398 | "\\item 'RNATURALINC2014'\n", 399 | "\\item 'RNATURALINC2015'\n", 400 | "\\item 'RNATURALINC2016'\n", 401 | "\\item 'RNATURALINC2017'\n", 402 | "\\item 'RINTERNATIONALMIG2011'\n", 403 | "\\item 'RINTERNATIONALMIG2012'\n", 404 | "\\item 'RINTERNATIONALMIG2013'\n", 405 | "\\item 'RINTERNATIONALMIG2014'\n", 406 | "\\item 'RINTERNATIONALMIG2015'\n", 407 | "\\item 'RINTERNATIONALMIG2016'\n", 408 | "\\item 'RINTERNATIONALMIG2017'\n", 409 | "\\item 'RDOMESTICMIG2011'\n", 410 | "\\item 'RDOMESTICMIG2012'\n", 411 | "\\item 'RDOMESTICMIG2013'\n", 412 | "\\item 'RDOMESTICMIG2014'\n", 413 | "\\item 'RDOMESTICMIG2015'\n", 414 | "\\item 'RDOMESTICMIG2016'\n", 415 | "\\item 'RDOMESTICMIG2017'\n", 416 | "\\item 'RNETMIG2011'\n", 417 | "\\item 'RNETMIG2012'\n", 418 | "\\item 'RNETMIG2013'\n", 419 | "\\item 'RNETMIG2014'\n", 420 | "\\item 'RNETMIG2015'\n", 421 | "\\item 'RNETMIG2016'\n", 422 | "\\item 'RNETMIG2017'\n", 423 | "\\end{enumerate*}\n" 424 | ], 425 | "text/markdown": [ 426 | "1. 'SUMLEV'\n", 427 | "2. 'REGION'\n", 428 | "3. 'DIVISION'\n", 429 | "4. 'STATE'\n", 430 | "5. 'COUNTY'\n", 431 | "6. 'STNAME'\n", 432 | "7. 'CTYNAME'\n", 433 | "8. 'CENSUS2010POP'\n", 434 | "9. 'ESTIMATESBASE2010'\n", 435 | "10. 'POPESTIMATE2010'\n", 436 | "11. 'POPESTIMATE2011'\n", 437 | "12. 'POPESTIMATE2012'\n", 438 | "13. 'POPESTIMATE2013'\n", 439 | "14. 'POPESTIMATE2014'\n", 440 | "15. 'POPESTIMATE2015'\n", 441 | "16. 'POPESTIMATE2016'\n", 442 | "17. 'POPESTIMATE2017'\n", 443 | "18. 'NPOPCHG_2010'\n", 444 | "19. 'NPOPCHG_2011'\n", 445 | "20. 'NPOPCHG_2012'\n", 446 | "21. 'NPOPCHG_2013'\n", 447 | "22. 'NPOPCHG_2014'\n", 448 | "23. 'NPOPCHG_2015'\n", 449 | "24. 'NPOPCHG_2016'\n", 450 | "25. 'NPOPCHG_2017'\n", 451 | "26. 'BIRTHS2010'\n", 452 | "27. 'BIRTHS2011'\n", 453 | "28. 'BIRTHS2012'\n", 454 | "29. 'BIRTHS2013'\n", 455 | "30. 'BIRTHS2014'\n", 456 | "31. 'BIRTHS2015'\n", 457 | "32. 'BIRTHS2016'\n", 458 | "33. 'BIRTHS2017'\n", 459 | "34. 'DEATHS2010'\n", 460 | "35. 'DEATHS2011'\n", 461 | "36. 'DEATHS2012'\n", 462 | "37. 'DEATHS2013'\n", 463 | "38. 'DEATHS2014'\n", 464 | "39. 'DEATHS2015'\n", 465 | "40. 'DEATHS2016'\n", 466 | "41. 'DEATHS2017'\n", 467 | "42. 'NATURALINC2010'\n", 468 | "43. 'NATURALINC2011'\n", 469 | "44. 'NATURALINC2012'\n", 470 | "45. 'NATURALINC2013'\n", 471 | "46. 'NATURALINC2014'\n", 472 | "47. 'NATURALINC2015'\n", 473 | "48. 'NATURALINC2016'\n", 474 | "49. 'NATURALINC2017'\n", 475 | "50. 'INTERNATIONALMIG2010'\n", 476 | "51. 'INTERNATIONALMIG2011'\n", 477 | "52. 'INTERNATIONALMIG2012'\n", 478 | "53. 'INTERNATIONALMIG2013'\n", 479 | "54. 'INTERNATIONALMIG2014'\n", 480 | "55. 'INTERNATIONALMIG2015'\n", 481 | "56. 'INTERNATIONALMIG2016'\n", 482 | "57. 'INTERNATIONALMIG2017'\n", 483 | "58. 'DOMESTICMIG2010'\n", 484 | "59. 'DOMESTICMIG2011'\n", 485 | "60. 'DOMESTICMIG2012'\n", 486 | "61. 'DOMESTICMIG2013'\n", 487 | "62. 'DOMESTICMIG2014'\n", 488 | "63. 'DOMESTICMIG2015'\n", 489 | "64. 'DOMESTICMIG2016'\n", 490 | "65. 'DOMESTICMIG2017'\n", 491 | "66. 'NETMIG2010'\n", 492 | "67. 'NETMIG2011'\n", 493 | "68. 'NETMIG2012'\n", 494 | "69. 'NETMIG2013'\n", 495 | "70. 'NETMIG2014'\n", 496 | "71. 'NETMIG2015'\n", 497 | "72. 'NETMIG2016'\n", 498 | "73. 'NETMIG2017'\n", 499 | "74. 'RESIDUAL2010'\n", 500 | "75. 'RESIDUAL2011'\n", 501 | "76. 'RESIDUAL2012'\n", 502 | "77. 'RESIDUAL2013'\n", 503 | "78. 'RESIDUAL2014'\n", 504 | "79. 'RESIDUAL2015'\n", 505 | "80. 'RESIDUAL2016'\n", 506 | "81. 'RESIDUAL2017'\n", 507 | "82. 'GQESTIMATESBASE2010'\n", 508 | "83. 'GQESTIMATES2010'\n", 509 | "84. 'GQESTIMATES2011'\n", 510 | "85. 'GQESTIMATES2012'\n", 511 | "86. 'GQESTIMATES2013'\n", 512 | "87. 'GQESTIMATES2014'\n", 513 | "88. 'GQESTIMATES2015'\n", 514 | "89. 'GQESTIMATES2016'\n", 515 | "90. 'GQESTIMATES2017'\n", 516 | "91. 'RBIRTH2011'\n", 517 | "92. 'RBIRTH2012'\n", 518 | "93. 'RBIRTH2013'\n", 519 | "94. 'RBIRTH2014'\n", 520 | "95. 'RBIRTH2015'\n", 521 | "96. 'RBIRTH2016'\n", 522 | "97. 'RBIRTH2017'\n", 523 | "98. 'RDEATH2011'\n", 524 | "99. 'RDEATH2012'\n", 525 | "100. 'RDEATH2013'\n", 526 | "101. 'RDEATH2014'\n", 527 | "102. 'RDEATH2015'\n", 528 | "103. 'RDEATH2016'\n", 529 | "104. 'RDEATH2017'\n", 530 | "105. 'RNATURALINC2011'\n", 531 | "106. 'RNATURALINC2012'\n", 532 | "107. 'RNATURALINC2013'\n", 533 | "108. 'RNATURALINC2014'\n", 534 | "109. 'RNATURALINC2015'\n", 535 | "110. 'RNATURALINC2016'\n", 536 | "111. 'RNATURALINC2017'\n", 537 | "112. 'RINTERNATIONALMIG2011'\n", 538 | "113. 'RINTERNATIONALMIG2012'\n", 539 | "114. 'RINTERNATIONALMIG2013'\n", 540 | "115. 'RINTERNATIONALMIG2014'\n", 541 | "116. 'RINTERNATIONALMIG2015'\n", 542 | "117. 'RINTERNATIONALMIG2016'\n", 543 | "118. 'RINTERNATIONALMIG2017'\n", 544 | "119. 'RDOMESTICMIG2011'\n", 545 | "120. 'RDOMESTICMIG2012'\n", 546 | "121. 'RDOMESTICMIG2013'\n", 547 | "122. 'RDOMESTICMIG2014'\n", 548 | "123. 'RDOMESTICMIG2015'\n", 549 | "124. 'RDOMESTICMIG2016'\n", 550 | "125. 'RDOMESTICMIG2017'\n", 551 | "126. 'RNETMIG2011'\n", 552 | "127. 'RNETMIG2012'\n", 553 | "128. 'RNETMIG2013'\n", 554 | "129. 'RNETMIG2014'\n", 555 | "130. 'RNETMIG2015'\n", 556 | "131. 'RNETMIG2016'\n", 557 | "132. 'RNETMIG2017'\n", 558 | "\n", 559 | "\n" 560 | ], 561 | "text/plain": [ 562 | " [1] \"SUMLEV\" \"REGION\" \"DIVISION\" \n", 563 | " [4] \"STATE\" \"COUNTY\" \"STNAME\" \n", 564 | " [7] \"CTYNAME\" \"CENSUS2010POP\" \"ESTIMATESBASE2010\" \n", 565 | " [10] \"POPESTIMATE2010\" \"POPESTIMATE2011\" \"POPESTIMATE2012\" \n", 566 | " [13] \"POPESTIMATE2013\" \"POPESTIMATE2014\" \"POPESTIMATE2015\" \n", 567 | " [16] \"POPESTIMATE2016\" \"POPESTIMATE2017\" \"NPOPCHG_2010\" \n", 568 | " [19] \"NPOPCHG_2011\" \"NPOPCHG_2012\" \"NPOPCHG_2013\" \n", 569 | " [22] \"NPOPCHG_2014\" \"NPOPCHG_2015\" \"NPOPCHG_2016\" \n", 570 | " [25] \"NPOPCHG_2017\" \"BIRTHS2010\" \"BIRTHS2011\" \n", 571 | " [28] \"BIRTHS2012\" \"BIRTHS2013\" \"BIRTHS2014\" \n", 572 | " [31] \"BIRTHS2015\" \"BIRTHS2016\" \"BIRTHS2017\" \n", 573 | " [34] \"DEATHS2010\" \"DEATHS2011\" \"DEATHS2012\" \n", 574 | " [37] \"DEATHS2013\" \"DEATHS2014\" \"DEATHS2015\" \n", 575 | " [40] \"DEATHS2016\" \"DEATHS2017\" \"NATURALINC2010\" \n", 576 | " [43] \"NATURALINC2011\" \"NATURALINC2012\" \"NATURALINC2013\" \n", 577 | " [46] \"NATURALINC2014\" \"NATURALINC2015\" \"NATURALINC2016\" \n", 578 | " [49] \"NATURALINC2017\" \"INTERNATIONALMIG2010\" \"INTERNATIONALMIG2011\" \n", 579 | " [52] \"INTERNATIONALMIG2012\" \"INTERNATIONALMIG2013\" \"INTERNATIONALMIG2014\" \n", 580 | " [55] \"INTERNATIONALMIG2015\" \"INTERNATIONALMIG2016\" \"INTERNATIONALMIG2017\" \n", 581 | " [58] \"DOMESTICMIG2010\" \"DOMESTICMIG2011\" \"DOMESTICMIG2012\" \n", 582 | " [61] \"DOMESTICMIG2013\" \"DOMESTICMIG2014\" \"DOMESTICMIG2015\" \n", 583 | " [64] \"DOMESTICMIG2016\" \"DOMESTICMIG2017\" \"NETMIG2010\" \n", 584 | " [67] \"NETMIG2011\" \"NETMIG2012\" \"NETMIG2013\" \n", 585 | " [70] \"NETMIG2014\" \"NETMIG2015\" \"NETMIG2016\" \n", 586 | " [73] \"NETMIG2017\" \"RESIDUAL2010\" \"RESIDUAL2011\" \n", 587 | " [76] \"RESIDUAL2012\" \"RESIDUAL2013\" \"RESIDUAL2014\" \n", 588 | " [79] \"RESIDUAL2015\" \"RESIDUAL2016\" \"RESIDUAL2017\" \n", 589 | " [82] \"GQESTIMATESBASE2010\" \"GQESTIMATES2010\" \"GQESTIMATES2011\" \n", 590 | " [85] \"GQESTIMATES2012\" \"GQESTIMATES2013\" \"GQESTIMATES2014\" \n", 591 | " [88] \"GQESTIMATES2015\" \"GQESTIMATES2016\" \"GQESTIMATES2017\" \n", 592 | " [91] \"RBIRTH2011\" \"RBIRTH2012\" \"RBIRTH2013\" \n", 593 | " [94] \"RBIRTH2014\" \"RBIRTH2015\" \"RBIRTH2016\" \n", 594 | " [97] \"RBIRTH2017\" \"RDEATH2011\" \"RDEATH2012\" \n", 595 | "[100] \"RDEATH2013\" \"RDEATH2014\" \"RDEATH2015\" \n", 596 | "[103] \"RDEATH2016\" \"RDEATH2017\" \"RNATURALINC2011\" \n", 597 | "[106] \"RNATURALINC2012\" \"RNATURALINC2013\" \"RNATURALINC2014\" \n", 598 | "[109] \"RNATURALINC2015\" \"RNATURALINC2016\" \"RNATURALINC2017\" \n", 599 | "[112] \"RINTERNATIONALMIG2011\" \"RINTERNATIONALMIG2012\" \"RINTERNATIONALMIG2013\"\n", 600 | "[115] \"RINTERNATIONALMIG2014\" \"RINTERNATIONALMIG2015\" \"RINTERNATIONALMIG2016\"\n", 601 | "[118] \"RINTERNATIONALMIG2017\" \"RDOMESTICMIG2011\" \"RDOMESTICMIG2012\" \n", 602 | "[121] \"RDOMESTICMIG2013\" \"RDOMESTICMIG2014\" \"RDOMESTICMIG2015\" \n", 603 | "[124] \"RDOMESTICMIG2016\" \"RDOMESTICMIG2017\" \"RNETMIG2011\" \n", 604 | "[127] \"RNETMIG2012\" \"RNETMIG2013\" \"RNETMIG2014\" \n", 605 | "[130] \"RNETMIG2015\" \"RNETMIG2016\" \"RNETMIG2017\" " 606 | ] 607 | }, 608 | "metadata": {}, 609 | "output_type": "display_data" 610 | } 611 | ], 612 | "source": [ 613 | "colnames(counties)" 614 | ] 615 | }, 616 | { 617 | "cell_type": "markdown", 618 | "metadata": {}, 619 | "source": [ 620 | "Here's some code to filter out just Nebraska counties, remove the statewide total number and calculate percent change into a field called change. " 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 5, 626 | "metadata": {}, 627 | "outputs": [], 628 | "source": [ 629 | "nebraska <- counties %>% \n", 630 | "filter(STNAME == \"Nebraska\") %>% \n", 631 | "filter(SUMLEV == 50) %>% \n", 632 | "mutate(change = ((POPESTIMATE2017-POPESTIMATE2016)/POPESTIMATE2016)*100)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "### Homework:\n", 640 | "\n", 641 | "Read Tufte 2,3 and 5 and be prepared for a disussion of lying with charts. Also, prepare a pitch for your next visual story, which is due Thursday of Dead Week. " 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [] 650 | } 651 | ], 652 | "metadata": { 653 | "anaconda-cloud": {}, 654 | "kernelspec": { 655 | "display_name": "R", 656 | "language": "R", 657 | "name": "ir" 658 | }, 659 | "language_info": { 660 | "codemirror_mode": "r", 661 | "file_extension": ".r", 662 | "mimetype": "text/x-r-source", 663 | "name": "R", 664 | "pygments_lexer": "r", 665 | "version": "3.4.3" 666 | } 667 | }, 668 | "nbformat": 4, 669 | "nbformat_minor": 1 670 | } 671 | -------------------------------------------------------------------------------- /Assignments/2_R_Basics/RBasicsPart2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic data analysis in R\n", 8 | "\n", 9 | "R is a statistical programming language that is purpose built for data analysis. \n", 10 | "\n", 11 | "Base R does a lot, but there are a mountain of external libraries that do things to make R better/easier/more fully featured. One of the best libraries, in your professor's opinion, is `dplyr`, a library for working with data. To use dplyr, you need to import it. " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "\n", 24 | "Attaching package: ‘dplyr’\n", 25 | "\n", 26 | "The following objects are masked from ‘package:stats’:\n", 27 | "\n", 28 | " filter, lag\n", 29 | "\n", 30 | "The following objects are masked from ‘package:base’:\n", 31 | "\n", 32 | " intersect, setdiff, setequal, union\n", 33 | "\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "library(dplyr)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "The first thing we need to do is get some data to work with. We do that by reading it in. In our case, we're going to read data from a csv file -- a comma-separated values file. \n", 46 | "\n", 47 | "The code looks like this: \n", 48 | "\n", 49 | "`mountainlions <- read.csv(\"../../Data/mountainlions.csv\")`\n", 50 | "\n", 51 | "Let's unpack that. \n", 52 | "\n", 53 | "The first part -- `mountainlions` -- is the name of your variable. A variable is just a name of a thing. In this case, our variable is a data frame, which is R's way of storing data. We can call this whatever we want. I always want to name data frames after what is in it. In this case, we're going to import a dataset of mountain lion sightings from the Nebraska Game and Parks Commission.\n", 54 | "\n", 55 | "The `<-` bit is the variable assignment operator. It's how we know we're assigning something to a word. \n", 56 | "\n", 57 | "The `read.csv` bits are pretty obvious. What happens in the quote marks is the path to the data. In there, I have to tell R where it find the data. The easiest thing to do, if you are confused about how to find your data, is to put your data in the same folder as as your notebook. In my case, I've got a folder called Data that's two levels up from my work folder. So the `../` means move up one level. So move up one level, move up one level, find Data, then in there is a file called mountainlions.csv. \n", 58 | "\n", 59 | "What you put in there will be different from mine. So your first task is to import the data. " 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "mountainlions <- read.csv(\"../../Data/mountainlions.csv\")" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Now we can inspect the data we imported. What does it look like? To do that, we use `head(mountainlions)` to show the headers and the first six rows of data. If we wanted to see them all, we could just simply enter `mountainlions` and run it. \n", 78 | "\n", 79 | "To get the number of records in our dataset, we run `nrow(mountainlions)`" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/html": [ 90 | "\n", 91 | "\n", 92 | "\n", 93 | "\t\n", 94 | "\t\n", 95 | "\t\n", 96 | "\t\n", 97 | "\t\n", 98 | "\t\n", 99 | "\n", 100 | "
IDCofirm.TypeCOUNTYDate
1 Track Dawes 9/14/91
2 Mortality Sioux 11/10/91
3 Mortality Scotts Bluff4/21/96
4 Mortality Sioux 5/9/99
5 Mortality Box Butte 9/29/99
6 Track Scotts Bluff11/12/99
\n" 101 | ], 102 | "text/latex": [ 103 | "\\begin{tabular}{r|llll}\n", 104 | " ID & Cofirm.Type & COUNTY & Date\\\\\n", 105 | "\\hline\n", 106 | "\t 1 & Track & Dawes & 9/14/91 \\\\\n", 107 | "\t 2 & Mortality & Sioux & 11/10/91 \\\\\n", 108 | "\t 3 & Mortality & Scotts Bluff & 4/21/96 \\\\\n", 109 | "\t 4 & Mortality & Sioux & 5/9/99 \\\\\n", 110 | "\t 5 & Mortality & Box Butte & 9/29/99 \\\\\n", 111 | "\t 6 & Track & Scotts Bluff & 11/12/99 \\\\\n", 112 | "\\end{tabular}\n" 113 | ], 114 | "text/markdown": [ 115 | "\n", 116 | "ID | Cofirm.Type | COUNTY | Date | \n", 117 | "|---|---|---|---|---|---|\n", 118 | "| 1 | Track | Dawes | 9/14/91 | \n", 119 | "| 2 | Mortality | Sioux | 11/10/91 | \n", 120 | "| 3 | Mortality | Scotts Bluff | 4/21/96 | \n", 121 | "| 4 | Mortality | Sioux | 5/9/99 | \n", 122 | "| 5 | Mortality | Box Butte | 9/29/99 | \n", 123 | "| 6 | Track | Scotts Bluff | 11/12/99 | \n", 124 | "\n", 125 | "\n" 126 | ], 127 | "text/plain": [ 128 | " ID Cofirm.Type COUNTY Date \n", 129 | "1 1 Track Dawes 9/14/91 \n", 130 | "2 2 Mortality Sioux 11/10/91\n", 131 | "3 3 Mortality Scotts Bluff 4/21/96 \n", 132 | "4 4 Mortality Sioux 5/9/99 \n", 133 | "5 5 Mortality Box Butte 9/29/99 \n", 134 | "6 6 Track Scotts Bluff 11/12/99" 135 | ] 136 | }, 137 | "metadata": {}, 138 | "output_type": "display_data" 139 | }, 140 | { 141 | "data": { 142 | "text/html": [ 143 | "393" 144 | ], 145 | "text/latex": [ 146 | "393" 147 | ], 148 | "text/markdown": [ 149 | "393" 150 | ], 151 | "text/plain": [ 152 | "[1] 393" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "output_type": "display_data" 157 | } 158 | ], 159 | "source": [ 160 | "head(mountainlions)\n", 161 | "nrow(mountainlions)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "So what if we wanted to know how many mountain lion sightings there were in each county? To do that by hand, we'd have to take each of the 393 records and sort them into a pile. We'd put them in groups and then count them. \n", 169 | "\n", 170 | "`dplyr` has a group by function in it that does just this. A massive amount of data analysis involves grouping like things together at some point. So it's a good place to start. \n", 171 | "\n", 172 | "So to do this, we'll take our dataset and we'll introduce a new operator: `%>%`. The best way to read that operator, in my opinion, is to interpret that as \"and then do this.\" Here's the code: " 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 4, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "\n", 184 | "\n", 185 | "\n", 186 | "\t\n", 187 | "\t\n", 188 | "\t\n", 189 | "\t\n", 190 | "\t\n", 191 | "\t\n", 192 | "\t\n", 193 | "\t\n", 194 | "\t\n", 195 | "\t\n", 196 | "\t\n", 197 | "\t\n", 198 | "\t\n", 199 | "\t\n", 200 | "\t\n", 201 | "\t\n", 202 | "\t\n", 203 | "\t\n", 204 | "\t\n", 205 | "\t\n", 206 | "\t\n", 207 | "\t\n", 208 | "\t\n", 209 | "\t\n", 210 | "\t\n", 211 | "\t\n", 212 | "\t\n", 213 | "\t\n", 214 | "\t\n", 215 | "\t\n", 216 | "\t\n", 217 | "\t\n", 218 | "\t\n", 219 | "\t\n", 220 | "\t\n", 221 | "\t\n", 222 | "\t\n", 223 | "\t\n", 224 | "\t\n", 225 | "\t\n", 226 | "\t\n", 227 | "\t\n", 228 | "\n", 229 | "
COUNTYcount
Banner 6
Blaine 3
Box Butte 4
Brown 15
Buffalo 3
Cedar 1
Cherry 30
Custer 8
Dakota 3
Dawes 111
Dawson 5
Dixon 3
Douglas 2
Frontier 1
Hall 1
Holt 2
Hooker 1
Howard 3
Keith 1
Keya Paha 20
Kimball 1
Knox 8
Lincoln 10
Merrick 1
Morrill 2
Nance 1
Nemaha 5
Platte 1
Polk 1
Richardson 2
Rock 11
Sarpy 1
Saunders 2
Scotts Bluff 26
sheridan 2
Sheridan 35
Sherman 1
Sioux 52
Thomas 5
Thurston 1
Valley 1
Wheeler 1
\n" 230 | ], 231 | "text/latex": [ 232 | "\\begin{tabular}{r|ll}\n", 233 | " COUNTY & count\\\\\n", 234 | "\\hline\n", 235 | "\t Banner & 6 \\\\\n", 236 | "\t Blaine & 3 \\\\\n", 237 | "\t Box Butte & 4 \\\\\n", 238 | "\t Brown & 15 \\\\\n", 239 | "\t Buffalo & 3 \\\\\n", 240 | "\t Cedar & 1 \\\\\n", 241 | "\t Cherry & 30 \\\\\n", 242 | "\t Custer & 8 \\\\\n", 243 | "\t Dakota & 3 \\\\\n", 244 | "\t Dawes & 111 \\\\\n", 245 | "\t Dawson & 5 \\\\\n", 246 | "\t Dixon & 3 \\\\\n", 247 | "\t Douglas & 2 \\\\\n", 248 | "\t Frontier & 1 \\\\\n", 249 | "\t Hall & 1 \\\\\n", 250 | "\t Holt & 2 \\\\\n", 251 | "\t Hooker & 1 \\\\\n", 252 | "\t Howard & 3 \\\\\n", 253 | "\t Keith & 1 \\\\\n", 254 | "\t Keya Paha & 20 \\\\\n", 255 | "\t Kimball & 1 \\\\\n", 256 | "\t Knox & 8 \\\\\n", 257 | "\t Lincoln & 10 \\\\\n", 258 | "\t Merrick & 1 \\\\\n", 259 | "\t Morrill & 2 \\\\\n", 260 | "\t Nance & 1 \\\\\n", 261 | "\t Nemaha & 5 \\\\\n", 262 | "\t Platte & 1 \\\\\n", 263 | "\t Polk & 1 \\\\\n", 264 | "\t Richardson & 2 \\\\\n", 265 | "\t Rock & 11 \\\\\n", 266 | "\t Sarpy & 1 \\\\\n", 267 | "\t Saunders & 2 \\\\\n", 268 | "\t Scotts Bluff & 26 \\\\\n", 269 | "\t sheridan & 2 \\\\\n", 270 | "\t Sheridan & 35 \\\\\n", 271 | "\t Sherman & 1 \\\\\n", 272 | "\t Sioux & 52 \\\\\n", 273 | "\t Thomas & 5 \\\\\n", 274 | "\t Thurston & 1 \\\\\n", 275 | "\t Valley & 1 \\\\\n", 276 | "\t Wheeler & 1 \\\\\n", 277 | "\\end{tabular}\n" 278 | ], 279 | "text/markdown": [ 280 | "\n", 281 | "COUNTY | count | \n", 282 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", 283 | "| Banner | 6 | \n", 284 | "| Blaine | 3 | \n", 285 | "| Box Butte | 4 | \n", 286 | "| Brown | 15 | \n", 287 | "| Buffalo | 3 | \n", 288 | "| Cedar | 1 | \n", 289 | "| Cherry | 30 | \n", 290 | "| Custer | 8 | \n", 291 | "| Dakota | 3 | \n", 292 | "| Dawes | 111 | \n", 293 | "| Dawson | 5 | \n", 294 | "| Dixon | 3 | \n", 295 | "| Douglas | 2 | \n", 296 | "| Frontier | 1 | \n", 297 | "| Hall | 1 | \n", 298 | "| Holt | 2 | \n", 299 | "| Hooker | 1 | \n", 300 | "| Howard | 3 | \n", 301 | "| Keith | 1 | \n", 302 | "| Keya Paha | 20 | \n", 303 | "| Kimball | 1 | \n", 304 | "| Knox | 8 | \n", 305 | "| Lincoln | 10 | \n", 306 | "| Merrick | 1 | \n", 307 | "| Morrill | 2 | \n", 308 | "| Nance | 1 | \n", 309 | "| Nemaha | 5 | \n", 310 | "| Platte | 1 | \n", 311 | "| Polk | 1 | \n", 312 | "| Richardson | 2 | \n", 313 | "| Rock | 11 | \n", 314 | "| Sarpy | 1 | \n", 315 | "| Saunders | 2 | \n", 316 | "| Scotts Bluff | 26 | \n", 317 | "| sheridan | 2 | \n", 318 | "| Sheridan | 35 | \n", 319 | "| Sherman | 1 | \n", 320 | "| Sioux | 52 | \n", 321 | "| Thomas | 5 | \n", 322 | "| Thurston | 1 | \n", 323 | "| Valley | 1 | \n", 324 | "| Wheeler | 1 | \n", 325 | "\n", 326 | "\n" 327 | ], 328 | "text/plain": [ 329 | " COUNTY count\n", 330 | "1 Banner 6 \n", 331 | "2 Blaine 3 \n", 332 | "3 Box Butte 4 \n", 333 | "4 Brown 15 \n", 334 | "5 Buffalo 3 \n", 335 | "6 Cedar 1 \n", 336 | "7 Cherry 30 \n", 337 | "8 Custer 8 \n", 338 | "9 Dakota 3 \n", 339 | "10 Dawes 111 \n", 340 | "11 Dawson 5 \n", 341 | "12 Dixon 3 \n", 342 | "13 Douglas 2 \n", 343 | "14 Frontier 1 \n", 344 | "15 Hall 1 \n", 345 | "16 Holt 2 \n", 346 | "17 Hooker 1 \n", 347 | "18 Howard 3 \n", 348 | "19 Keith 1 \n", 349 | "20 Keya Paha 20 \n", 350 | "21 Kimball 1 \n", 351 | "22 Knox 8 \n", 352 | "23 Lincoln 10 \n", 353 | "24 Merrick 1 \n", 354 | "25 Morrill 2 \n", 355 | "26 Nance 1 \n", 356 | "27 Nemaha 5 \n", 357 | "28 Platte 1 \n", 358 | "29 Polk 1 \n", 359 | "30 Richardson 2 \n", 360 | "31 Rock 11 \n", 361 | "32 Sarpy 1 \n", 362 | "33 Saunders 2 \n", 363 | "34 Scotts Bluff 26 \n", 364 | "35 sheridan 2 \n", 365 | "36 Sheridan 35 \n", 366 | "37 Sherman 1 \n", 367 | "38 Sioux 52 \n", 368 | "39 Thomas 5 \n", 369 | "40 Thurston 1 \n", 370 | "41 Valley 1 \n", 371 | "42 Wheeler 1 " 372 | ] 373 | }, 374 | "metadata": {}, 375 | "output_type": "display_data" 376 | } 377 | ], 378 | "source": [ 379 | "mountainlions %>%\n", 380 | " group_by(COUNTY) %>%\n", 381 | " summarise(\n", 382 | " count = n(),\n", 383 | " ) " 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "So let's walk through that. We start with our dataset -- `mountainlions` -- and then we tell it to group the data by a given field in the data. In this case, we wanted to group together all the counties, signified by the field name COUNTY, which you could get from looking at `head(mountainlions)`. So after we group the data, we need to count them up. In dplyr, we use `summarize` [which can do more than just count things](http://dplyr.tidyverse.org/reference/summarise.html). So inside the parentheses in summarize, we set up the summaries we want. In this case, we just want a count of the counties. So `count = n(),` says create a new field, called `count` and set it equal to `n()`, which might look weird, but it's common in stats. The number of things in a dataset? Statisticians call in n. There are n number of incidents in this dataset. So `n()` is a function that counts the number of things there are. \n", 391 | "\n", 392 | "And when we run that, we get a list of counties with a count next to them. But it's not in any order. So we'll add another And Then Do This %>% and use `arrange`. Arrange does what you think it does -- it arranges data in order. By default, it's in ascending order -- smallest to largest. But if we want to know the county with the most mountain lion sightings, we need to sort it in descending order. That looks like this:" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 5, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/html": [ 403 | "\n", 404 | "\n", 405 | "\n", 406 | "\t\n", 407 | "\t\n", 408 | "\t\n", 409 | "\t\n", 410 | "\t\n", 411 | "\t\n", 412 | "\t\n", 413 | "\t\n", 414 | "\t\n", 415 | "\t\n", 416 | "\t\n", 417 | "\t\n", 418 | "\t\n", 419 | "\t\n", 420 | "\t\n", 421 | "\t\n", 422 | "\t\n", 423 | "\t\n", 424 | "\t\n", 425 | "\t\n", 426 | "\t\n", 427 | "\t\n", 428 | "\t\n", 429 | "\t\n", 430 | "\t\n", 431 | "\t\n", 432 | "\t\n", 433 | "\t\n", 434 | "\t\n", 435 | "\t\n", 436 | "\t\n", 437 | "\t\n", 438 | "\t\n", 439 | "\t\n", 440 | "\t\n", 441 | "\t\n", 442 | "\t\n", 443 | "\t\n", 444 | "\t\n", 445 | "\t\n", 446 | "\t\n", 447 | "\t\n", 448 | "\n", 449 | "
COUNTYcount
Dawes 111
Sioux 52
Sheridan 35
Cherry 30
Scotts Bluff 26
Keya Paha 20
Brown 15
Rock 11
Lincoln 10
Custer 8
Knox 8
Banner 6
Dawson 5
Nemaha 5
Thomas 5
Box Butte 4
Blaine 3
Buffalo 3
Dakota 3
Dixon 3
Howard 3
Douglas 2
Holt 2
Morrill 2
Richardson 2
Saunders 2
sheridan 2
Cedar 1
Frontier 1
Hall 1
Hooker 1
Keith 1
Kimball 1
Merrick 1
Nance 1
Platte 1
Polk 1
Sarpy 1
Sherman 1
Thurston 1
Valley 1
Wheeler 1
\n" 450 | ], 451 | "text/latex": [ 452 | "\\begin{tabular}{r|ll}\n", 453 | " COUNTY & count\\\\\n", 454 | "\\hline\n", 455 | "\t Dawes & 111 \\\\\n", 456 | "\t Sioux & 52 \\\\\n", 457 | "\t Sheridan & 35 \\\\\n", 458 | "\t Cherry & 30 \\\\\n", 459 | "\t Scotts Bluff & 26 \\\\\n", 460 | "\t Keya Paha & 20 \\\\\n", 461 | "\t Brown & 15 \\\\\n", 462 | "\t Rock & 11 \\\\\n", 463 | "\t Lincoln & 10 \\\\\n", 464 | "\t Custer & 8 \\\\\n", 465 | "\t Knox & 8 \\\\\n", 466 | "\t Banner & 6 \\\\\n", 467 | "\t Dawson & 5 \\\\\n", 468 | "\t Nemaha & 5 \\\\\n", 469 | "\t Thomas & 5 \\\\\n", 470 | "\t Box Butte & 4 \\\\\n", 471 | "\t Blaine & 3 \\\\\n", 472 | "\t Buffalo & 3 \\\\\n", 473 | "\t Dakota & 3 \\\\\n", 474 | "\t Dixon & 3 \\\\\n", 475 | "\t Howard & 3 \\\\\n", 476 | "\t Douglas & 2 \\\\\n", 477 | "\t Holt & 2 \\\\\n", 478 | "\t Morrill & 2 \\\\\n", 479 | "\t Richardson & 2 \\\\\n", 480 | "\t Saunders & 2 \\\\\n", 481 | "\t sheridan & 2 \\\\\n", 482 | "\t Cedar & 1 \\\\\n", 483 | "\t Frontier & 1 \\\\\n", 484 | "\t Hall & 1 \\\\\n", 485 | "\t Hooker & 1 \\\\\n", 486 | "\t Keith & 1 \\\\\n", 487 | "\t Kimball & 1 \\\\\n", 488 | "\t Merrick & 1 \\\\\n", 489 | "\t Nance & 1 \\\\\n", 490 | "\t Platte & 1 \\\\\n", 491 | "\t Polk & 1 \\\\\n", 492 | "\t Sarpy & 1 \\\\\n", 493 | "\t Sherman & 1 \\\\\n", 494 | "\t Thurston & 1 \\\\\n", 495 | "\t Valley & 1 \\\\\n", 496 | "\t Wheeler & 1 \\\\\n", 497 | "\\end{tabular}\n" 498 | ], 499 | "text/markdown": [ 500 | "\n", 501 | "COUNTY | count | \n", 502 | "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n", 503 | "| Dawes | 111 | \n", 504 | "| Sioux | 52 | \n", 505 | "| Sheridan | 35 | \n", 506 | "| Cherry | 30 | \n", 507 | "| Scotts Bluff | 26 | \n", 508 | "| Keya Paha | 20 | \n", 509 | "| Brown | 15 | \n", 510 | "| Rock | 11 | \n", 511 | "| Lincoln | 10 | \n", 512 | "| Custer | 8 | \n", 513 | "| Knox | 8 | \n", 514 | "| Banner | 6 | \n", 515 | "| Dawson | 5 | \n", 516 | "| Nemaha | 5 | \n", 517 | "| Thomas | 5 | \n", 518 | "| Box Butte | 4 | \n", 519 | "| Blaine | 3 | \n", 520 | "| Buffalo | 3 | \n", 521 | "| Dakota | 3 | \n", 522 | "| Dixon | 3 | \n", 523 | "| Howard | 3 | \n", 524 | "| Douglas | 2 | \n", 525 | "| Holt | 2 | \n", 526 | "| Morrill | 2 | \n", 527 | "| Richardson | 2 | \n", 528 | "| Saunders | 2 | \n", 529 | "| sheridan | 2 | \n", 530 | "| Cedar | 1 | \n", 531 | "| Frontier | 1 | \n", 532 | "| Hall | 1 | \n", 533 | "| Hooker | 1 | \n", 534 | "| Keith | 1 | \n", 535 | "| Kimball | 1 | \n", 536 | "| Merrick | 1 | \n", 537 | "| Nance | 1 | \n", 538 | "| Platte | 1 | \n", 539 | "| Polk | 1 | \n", 540 | "| Sarpy | 1 | \n", 541 | "| Sherman | 1 | \n", 542 | "| Thurston | 1 | \n", 543 | "| Valley | 1 | \n", 544 | "| Wheeler | 1 | \n", 545 | "\n", 546 | "\n" 547 | ], 548 | "text/plain": [ 549 | " COUNTY count\n", 550 | "1 Dawes 111 \n", 551 | "2 Sioux 52 \n", 552 | "3 Sheridan 35 \n", 553 | "4 Cherry 30 \n", 554 | "5 Scotts Bluff 26 \n", 555 | "6 Keya Paha 20 \n", 556 | "7 Brown 15 \n", 557 | "8 Rock 11 \n", 558 | "9 Lincoln 10 \n", 559 | "10 Custer 8 \n", 560 | "11 Knox 8 \n", 561 | "12 Banner 6 \n", 562 | "13 Dawson 5 \n", 563 | "14 Nemaha 5 \n", 564 | "15 Thomas 5 \n", 565 | "16 Box Butte 4 \n", 566 | "17 Blaine 3 \n", 567 | "18 Buffalo 3 \n", 568 | "19 Dakota 3 \n", 569 | "20 Dixon 3 \n", 570 | "21 Howard 3 \n", 571 | "22 Douglas 2 \n", 572 | "23 Holt 2 \n", 573 | "24 Morrill 2 \n", 574 | "25 Richardson 2 \n", 575 | "26 Saunders 2 \n", 576 | "27 sheridan 2 \n", 577 | "28 Cedar 1 \n", 578 | "29 Frontier 1 \n", 579 | "30 Hall 1 \n", 580 | "31 Hooker 1 \n", 581 | "32 Keith 1 \n", 582 | "33 Kimball 1 \n", 583 | "34 Merrick 1 \n", 584 | "35 Nance 1 \n", 585 | "36 Platte 1 \n", 586 | "37 Polk 1 \n", 587 | "38 Sarpy 1 \n", 588 | "39 Sherman 1 \n", 589 | "40 Thurston 1 \n", 590 | "41 Valley 1 \n", 591 | "42 Wheeler 1 " 592 | ] 593 | }, 594 | "metadata": {}, 595 | "output_type": "display_data" 596 | } 597 | ], 598 | "source": [ 599 | "mountainlions %>%\n", 600 | " group_by(COUNTY) %>%\n", 601 | " summarise(\n", 602 | " count = n(),\n", 603 | " ) %>% arrange(desc(count))" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "## More basics\n", 611 | "\n", 612 | "In the last example, we grouped some data together and counted it up, but there's so much more you can do. You can do multiple measures in a single step as well. \n", 613 | "\n", 614 | "Let's look at some simple college data. " 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 6, 620 | "metadata": { 621 | "collapsed": true 622 | }, 623 | "outputs": [], 624 | "source": [ 625 | "colleges <- read.csv(\"../../Data/colleges.csv\")" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 7, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "data": { 635 | "text/html": [ 636 | "\n", 637 | "\n", 638 | "\n", 639 | "\t\n", 640 | "\t\n", 641 | "\t\n", 642 | "\t\n", 643 | "\t\n", 644 | "\t\n", 645 | "\n", 646 | "
UnitIDNameInState1213OutOfState1213GradRate
151351 Indiana University-Bloomington 23116 44566 75
171100 Michigan State University 24028 43986 79
147767 Northwestern University 60840 60840 93
204796 Ohio State University-Main Campus 24919 40327 82
214777 Pennsylvania State University-Main Campus31854 44156 86
243780 Purdue University-Main Campus 23468 42270 69
\n" 647 | ], 648 | "text/latex": [ 649 | "\\begin{tabular}{r|lllll}\n", 650 | " UnitID & Name & InState1213 & OutOfState1213 & GradRate\\\\\n", 651 | "\\hline\n", 652 | "\t 151351 & Indiana University-Bloomington & 23116 & 44566 & 75 \\\\\n", 653 | "\t 171100 & Michigan State University & 24028 & 43986 & 79 \\\\\n", 654 | "\t 147767 & Northwestern University & 60840 & 60840 & 93 \\\\\n", 655 | "\t 204796 & Ohio State University-Main Campus & 24919 & 40327 & 82 \\\\\n", 656 | "\t 214777 & Pennsylvania State University-Main Campus & 31854 & 44156 & 86 \\\\\n", 657 | "\t 243780 & Purdue University-Main Campus & 23468 & 42270 & 69 \\\\\n", 658 | "\\end{tabular}\n" 659 | ], 660 | "text/markdown": [ 661 | "\n", 662 | "UnitID | Name | InState1213 | OutOfState1213 | GradRate | \n", 663 | "|---|---|---|---|---|---|\n", 664 | "| 151351 | Indiana University-Bloomington | 23116 | 44566 | 75 | \n", 665 | "| 171100 | Michigan State University | 24028 | 43986 | 79 | \n", 666 | "| 147767 | Northwestern University | 60840 | 60840 | 93 | \n", 667 | "| 204796 | Ohio State University-Main Campus | 24919 | 40327 | 82 | \n", 668 | "| 214777 | Pennsylvania State University-Main Campus | 31854 | 44156 | 86 | \n", 669 | "| 243780 | Purdue University-Main Campus | 23468 | 42270 | 69 | \n", 670 | "\n", 671 | "\n" 672 | ], 673 | "text/plain": [ 674 | " UnitID Name InState1213 OutOfState1213\n", 675 | "1 151351 Indiana University-Bloomington 23116 44566 \n", 676 | "2 171100 Michigan State University 24028 43986 \n", 677 | "3 147767 Northwestern University 60840 60840 \n", 678 | "4 204796 Ohio State University-Main Campus 24919 40327 \n", 679 | "5 214777 Pennsylvania State University-Main Campus 31854 44156 \n", 680 | "6 243780 Purdue University-Main Campus 23468 42270 \n", 681 | " GradRate\n", 682 | "1 75 \n", 683 | "2 79 \n", 684 | "3 93 \n", 685 | "4 82 \n", 686 | "5 86 \n", 687 | "6 69 " 688 | ] 689 | }, 690 | "metadata": {}, 691 | "output_type": "display_data" 692 | } 693 | ], 694 | "source": [ 695 | "head(colleges)" 696 | ] 697 | }, 698 | { 699 | "cell_type": "markdown", 700 | "metadata": {}, 701 | "source": [ 702 | "In summarize, we can calculate any number of measures. Here, we'll use R's built in `mean` and `median` functions to calculate ... well, you get the idea. " 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": 8, 708 | "metadata": {}, 709 | "outputs": [ 710 | { 711 | "data": { 712 | "text/html": [ 713 | "\n", 714 | "\n", 715 | "\n", 716 | "\t\n", 717 | "\n", 718 | "
countinstatemeanoutstatemeaninstatemedianoutstatemedian
14 27652.8642821.5 24473.5 42194
\n" 719 | ], 720 | "text/latex": [ 721 | "\\begin{tabular}{r|lllll}\n", 722 | " count & instatemean & outstatemean & instatemedian & outstatemedian\\\\\n", 723 | "\\hline\n", 724 | "\t 14 & 27652.86 & 42821.5 & 24473.5 & 42194 \\\\\n", 725 | "\\end{tabular}\n" 726 | ], 727 | "text/markdown": [ 728 | "\n", 729 | "count | instatemean | outstatemean | instatemedian | outstatemedian | \n", 730 | "|---|\n", 731 | "| 14 | 27652.86 | 42821.5 | 24473.5 | 42194 | \n", 732 | "\n", 733 | "\n" 734 | ], 735 | "text/plain": [ 736 | " count instatemean outstatemean instatemedian outstatemedian\n", 737 | "1 14 27652.86 42821.5 24473.5 42194 " 738 | ] 739 | }, 740 | "metadata": {}, 741 | "output_type": "display_data" 742 | } 743 | ], 744 | "source": [ 745 | "colleges %>%\n", 746 | " summarise(\n", 747 | " count = n(),\n", 748 | " instatemean = mean(InState1213),\n", 749 | " outstatemean = mean(OutOfState1213),\n", 750 | " instatemedian = median(InState1213),\n", 751 | " outstatemedian = median(OutOfState1213),\n", 752 | " )" 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "Now, what if we just wanted to see the University of Nebraska-Lincoln? So we can compare it to the mean and median. To do that, we use `filter`, which does what it says on the tin. You can simply filter the things you want (or don't want) so your numbers reflect the things you are just looking at. So in this case, we're going to get all the records where the Name equals \"University of Nebraska-Lincoln\"." 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 14, 765 | "metadata": {}, 766 | "outputs": [ 767 | { 768 | "data": { 769 | "text/html": [ 770 | "\n", 771 | "\n", 772 | "\n", 773 | "\t\n", 774 | "\n", 775 | "
UnitIDNameInState1213OutOfState1213GradRate
181464 University of Nebraska-Lincoln21700 34450 65
\n" 776 | ], 777 | "text/latex": [ 778 | "\\begin{tabular}{r|lllll}\n", 779 | " UnitID & Name & InState1213 & OutOfState1213 & GradRate\\\\\n", 780 | "\\hline\n", 781 | "\t 181464 & University of Nebraska-Lincoln & 21700 & 34450 & 65 \\\\\n", 782 | "\\end{tabular}\n" 783 | ], 784 | "text/markdown": [ 785 | "\n", 786 | "UnitID | Name | InState1213 | OutOfState1213 | GradRate | \n", 787 | "|---|\n", 788 | "| 181464 | University of Nebraska-Lincoln | 21700 | 34450 | 65 | \n", 789 | "\n", 790 | "\n" 791 | ], 792 | "text/plain": [ 793 | " UnitID Name InState1213 OutOfState1213 GradRate\n", 794 | "1 181464 University of Nebraska-Lincoln 21700 34450 65 " 795 | ] 796 | }, 797 | "metadata": {}, 798 | "output_type": "display_data" 799 | } 800 | ], 801 | "source": [ 802 | "colleges %>% filter(Name == \"University of Nebraska-Lincoln\")" 803 | ] 804 | }, 805 | { 806 | "cell_type": "markdown", 807 | "metadata": {}, 808 | "source": [ 809 | "## Assignment\n", 810 | "\n", 811 | "We're going to put it all together now. We're going to calculate the mean and median salaries of job titles at the University of Nebraska-Lincoln. \n", 812 | "\n", 813 | "Answer this question:\n", 814 | "\n", 815 | "**What are the top median salaries by job title at UNL? And how does that compare to the average salary for that position?**\n", 816 | "\n", 817 | "To do this, you'll need to [download this data](https://www.dropbox.com/s/yqweytz0sb6cd5f/nusalaries1718.csv?dl=0).\n", 818 | "\n", 819 | "#### Rubric\n", 820 | "\n", 821 | "1. Did you read the data into a dataframe? \n", 822 | "2. Did you use group by syntax correctly? \n", 823 | "3. Did you use summarize syntax correctly?\n", 824 | "4. Did you use filter syntax correctly?\n", 825 | "5. Did you use Markdown comments to explain your steps? " 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": null, 831 | "metadata": { 832 | "collapsed": true 833 | }, 834 | "outputs": [], 835 | "source": [] 836 | } 837 | ], 838 | "metadata": { 839 | "anaconda-cloud": {}, 840 | "kernelspec": { 841 | "display_name": "R", 842 | "language": "R", 843 | "name": "ir" 844 | }, 845 | "language_info": { 846 | "codemirror_mode": "r", 847 | "file_extension": ".r", 848 | "mimetype": "text/x-r-source", 849 | "name": "R", 850 | "pygments_lexer": "r", 851 | "version": "3.4.1" 852 | } 853 | }, 854 | "nbformat": 4, 855 | "nbformat_minor": 1 856 | } 857 | --------------------------------------------------------------------------------