├── .gitignore ├── Lab11-2-Data-Processing.ipynb ├── Lab11-Project-and-Food-Inspection-Forecasting.ipynb ├── README.md └── images ├── chicago_page.png ├── cumulative1.png ├── cumulative2.png ├── days.png ├── efficiency.png ├── git.png └── main_result.png /.gitignore: -------------------------------------------------------------------------------- 1 | # for this project only 2 | food-inspections-evaluation/ 3 | *.csv 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | #Ipython 64 | .ipynb_checkpoints/ 65 | # Created by .ignore support plugin (hsz.mobi) 66 | ### OSX template 67 | .DS_Store 68 | .AppleDouble 69 | .LSOverride 70 | 71 | # Icon must end with two \r 72 | Icon 73 | 74 | # Thumbnails 75 | ._* 76 | 77 | # Files that might appear in the root of a volume 78 | .DocumentRevisions-V100 79 | .fseventsd 80 | .Spotlight-V100 81 | .TemporaryItems 82 | .Trashes 83 | .VolumeIcon.icns 84 | 85 | # Directories potentially created on remote AFP share 86 | .AppleDB 87 | .AppleDesktop 88 | Network Trash Folder 89 | Temporary Items 90 | .apdisk 91 | 92 | #Temporary data 93 | hw1/tempdata/ 94 | hw1/.ipynb_checkpoints/ 95 | 96 | -------------------------------------------------------------------------------- /Lab11-2-Data-Processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Lab 11: Part 2 - Food Inspection Forecasting: Data processing\n", 8 | "This file is an ipython notebook with [`R-magic`](https://ipython.org/ipython-doc/1/config/extensions/rmagic.html) to convert the data from Rds (the R programming language data dtorage sytem) to `csv` to be read into Python. If you ever find yourself in a bind with R code available for you... give `R-magic` a try. \n", 9 | "\n", 10 | "\n", 11 | "## **HUGE NOTE: All code here is taken from the [food-inspections-evaluation]( https://github.com/Chicago/food-inspections-evaluation) repository** \n", 12 | "### They did a great job at cleaning the data in R so I don't want to repeat work.\n", 13 | "\n", 14 | "All code and data is available on GitHub:\n", 15 | "https://github.com/Chicago/food-inspections-evaluation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 6, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "The rpy2.ipython extension is already loaded. To reload it, use:\n", 30 | " %reload_ext rpy2.ipython\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "import rpy2\n", 36 | "import pandas as pd\n", 37 | "%load_ext rpy2.ipython" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 7, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%%R\n", 49 | "# change to your local clone\n", 50 | "data_dir = '~/food-inspections-evaluation/'\n", 51 | "out_dir = '~'\n", 52 | "\n", 53 | "library(\"data.table\", \"ggplot2\")\n", 54 | "\n", 55 | "setwd(data_dir)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### Food Inspection database processing" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 8, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "%%R\n", 74 | "food = readRDS(\"DATA/food_inspections.Rds\")\n", 75 | "write.csv(food, file = paste(out_dir, '/food_inspections.csv', sep = ''), row.names = FALSE)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "### Model Dataframe processing" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 9, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "%%R\n", 94 | "dat = readRDS(\"DATA/dat_model.Rds\")\n", 95 | "write.csv(dat, file = paste(out_dir, '/dat_model.csv', sep = ''))" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": { 102 | "collapsed": false, 103 | "scrolled": true 104 | }, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "Classes ‘data.table’ and 'data.frame':\t18712 obs. of 16 variables:\n", 110 | " $ Inspectorblue : num 0 1 1 1 1 0 0 0 0 0 ...\n", 111 | " $ Inspectorbrown : num 0 0 0 0 0 0 0 0 0 0 ...\n", 112 | " $ Inspectorgreen : num 1 0 0 0 0 0 0 0 0 0 ...\n", 113 | " $ Inspectororange : num 0 0 0 0 0 1 1 1 1 1 ...\n", 114 | " $ Inspectorpurple : num 0 0 0 0 0 0 0 0 0 0 ...\n", 115 | " $ Inspectoryellow : num 0 0 0 0 0 0 0 0 0 0 ...\n", 116 | " $ pastSerious : num 0 0 0 0 0 0 0 0 0 0 ...\n", 117 | " $ pastCritical : num 0 0 0 0 0 0 0 0 0 0 ...\n", 118 | " $ timeSinceLast : num 2 2 2 2 2 2 2 2 2 2 ...\n", 119 | " $ ageAtInspection : num 1 1 1 1 1 1 0 1 1 0 ...\n", 120 | " $ consumption_on_premises_incidental_activity: num 0 0 0 0 0 0 0 0 0 0 ...\n", 121 | " $ tobacco_retail_over_counter : num 1 0 0 0 0 0 0 0 0 0 ...\n", 122 | " $ temperatureMax : num 53.5 59 59 56.2 52.7 ...\n", 123 | " $ heat_burglary : num 26.99 13.98 12.61 35.91 9.53 ...\n", 124 | " $ heat_sanitation : num 37.75 15.41 8.32 38.19 2.13 ...\n", 125 | " $ heat_garbage : num 12.8 12.9 8 26.2 3.4 ...\n", 126 | " - attr(*, \".internal.selfref\")= \n", 127 | "[1] 18712\n" 128 | ] 129 | }, 130 | "metadata": {}, 131 | "output_type": "display_data" 132 | } 133 | ], 134 | "source": [ 135 | "%%R\n", 136 | "dat <- readRDS(\"DATA/dat_model.Rds\")\n", 137 | "\n", 138 | "## Only keep \"Retail Food Establishment\"\n", 139 | "dat <- dat[LICENSE_DESCRIPTION == \"Retail Food Establishment\"]\n", 140 | "## Remove License Description\n", 141 | "dat$LICENSE_DESCRIPTION <- NULL\n", 142 | "dat <- na.omit(dat)\n", 143 | "\n", 144 | "## Add criticalFound variable to dat:\n", 145 | "dat$criticalFound <- pmin(1, dat$criticalCount)\n", 146 | "\n", 147 | "# ## Set the key for dat\n", 148 | "setkey(dat, Inspection_ID)\n", 149 | "\n", 150 | "# Match time period of original results\n", 151 | "# dat <- dat[Inspection_Date < \"2013-09-01\" | Inspection_Date > \"2014-07-01\"]\n", 152 | "\n", 153 | "#==============================================================================\n", 154 | "# CREATE MODEL DATA\n", 155 | "#==============================================================================\n", 156 | "# sort(colnames(dat))\n", 157 | "\n", 158 | "xmat <- dat[ , list(Inspector = Inspector_Assigned,\n", 159 | " pastSerious = pmin(pastSerious, 1),\n", 160 | " pastCritical = pmin(pastCritical, 1),\n", 161 | " timeSinceLast,\n", 162 | " ageAtInspection = ifelse(ageAtInspection > 4, 1L, 0L),\n", 163 | " consumption_on_premises_incidental_activity,\n", 164 | " tobacco_retail_over_counter,\n", 165 | " temperatureMax,\n", 166 | " heat_burglary = pmin(heat_burglary, 70),\n", 167 | " heat_sanitation = pmin(heat_sanitation, 70),\n", 168 | " heat_garbage = pmin(heat_garbage, 50),\n", 169 | " # Facility_Type,\n", 170 | " criticalFound),\n", 171 | " keyby = Inspection_ID]\n", 172 | "mm <- model.matrix(criticalFound ~ . -1, data=xmat[ , -1, with=F])\n", 173 | "mm <- as.data.table(mm)\n", 174 | "str(mm)\n", 175 | "colnames(mm)\n", 176 | "\n", 177 | "#==============================================================================\n", 178 | "# CREATE TEST / TRAIN PARTITIONS\n", 179 | "#==============================================================================\n", 180 | "# 2014-07-01 is an easy separator\n", 181 | "\n", 182 | "dat[Inspection_Date < \"2014-07-01\", range(Inspection_Date)]\n", 183 | "dat[Inspection_Date > \"2014-07-01\", range(Inspection_Date)]\n", 184 | "\n", 185 | "iiTrain <- dat[ , which(Inspection_Date < \"2014-07-01\")]\n", 186 | "iiTest <- dat[ , which(Inspection_Date > \"2014-07-01\")]\n", 187 | "\n", 188 | "## Check to see if any rows didn't make it through the model.matrix formula\n", 189 | "nrow(dat)\n", 190 | "nrow(xmat)\n", 191 | "nrow(mm)\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 11, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "%%R\n", 203 | "# Output Model Matrix and Target\n", 204 | "write.csv(mm, file = paste(out_dir, '/model_matrix.csv', sep = ''), row.names = FALSE)\n", 205 | "write.csv(xmat$criticalFound, file = paste(out_dir, '/TARGET.csv', sep = ''), row.names = FALSE)" 206 | ] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python 2", 212 | "language": "python", 213 | "name": "python2" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 2 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython2", 225 | "version": "2.7.10" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 0 230 | } 231 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2015lab11 2 | -------------------------------------------------------------------------------- /images/chicago_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/chicago_page.png -------------------------------------------------------------------------------- /images/cumulative1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/cumulative1.png -------------------------------------------------------------------------------- /images/cumulative2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/cumulative2.png -------------------------------------------------------------------------------- /images/days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/days.png -------------------------------------------------------------------------------- /images/efficiency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/efficiency.png -------------------------------------------------------------------------------- /images/git.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/git.png -------------------------------------------------------------------------------- /images/main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cs109/2015lab11/509667be105657b0ea3d0717e93b59233284aa98/images/main_result.png --------------------------------------------------------------------------------