├── drugs ├── output │ └── .gitignore ├── druguse.r ├── data │ └── druguse.csv ├── readme.md └── eradication.r ├── Benford ├── output │ └── .gitignore ├── readme.md └── benford.r ├── CIEISP ├── output │ └── .gitignore ├── data │ ├── LC.pdf │ ├── RE.pdf │ ├── BD02.TXT.bz2 │ ├── BD03.TXT.bz2 │ ├── BD04.TXT.bz2 │ ├── BD05.TXT.bz2 │ ├── BD06.TXT.bz2 │ ├── BD07.TXT.bz2 │ └── BD08.TXT.bz2 ├── michoacan.r ├── readme.md └── cieisp.r ├── timelines ├── output │ └── .gitignore ├── data │ ├── .gitignore │ ├── pop.csv.bz2 │ ├── county-month.csv.bz2 │ ├── county-month-mx.csv.bz2 │ ├── county-month-ver.csv.bz2 │ ├── county-month-chiapas.csv.bz2 │ ├── county-month-gue-oax.csv.bz2 │ ├── county-month-nl-tam.csv.bz2 │ ├── military-operations.yaml │ └── homicide-county-month.r~ ├── constants.r ├── report │ ├── report.pdf │ ├── report.tex │ └── report.Rnw ├── readme.md ├── ciudad-juarez.r └── timelines-mun.r ├── trends ├── output │ └── .gitignore ├── readme.md └── seasonal-decomposition.r ├── INEGIvsSNSP ├── output │ ├── .gitignore │ └── INEGI-SNSP-dif.png ├── data │ └── states-icesi.csv ├── readme.md ├── inegi-vs-snsp.r └── snsp-vs-cieisp.r ├── guns-executions ├── output │ └── .gitignore ├── data │ ├── firearm-hom-state.csv │ ├── firearm-hom-statetot.csv │ └── firearm-executions.csv ├── readme.md └── guns-executions.r ├── predictions ├── output │ └── .gitignore ├── data │ ├── executions-bystate.csv │ └── executions-bymonth.csv ├── readme.md └── predictions.r ├── missing-homicides ├── data │ ├── .gitignore │ ├── INEGIvsICESI.csv │ └── PAHO-UN-INEGI-ICESI.csv ├── stat-yearbooks │ ├── .gitignore │ └── wget.bat ├── #icesi-vs-inegi.r# ├── output │ └── INEGIvsYearbook.png ├── massacres.r ├── readme.md └── missing-homicides.r ├── most-violent-counties ├── output │ └── .gitignore ├── data │ ├── poblacionh.csv.bz2 │ └── poblacionm.csv.bz2 ├── cities-mun.r └── most-violent.r ├── choropleths ├── output │ └── .gitignore ├── data │ ├── inegi1990.csv │ ├── inegi1995.csv │ ├── inegi2000.csv │ ├── inegi2005.csv │ └── pop-w.csv.bz2 ├── readme.md └── county-maps-homicide.r ├── accidents-homicides-suicides ├── output │ ├── .gitignore │ ├── Thumbs.db │ ├── accident.png │ ├── homicide.png │ ├── states.csv │ ├── suicide.png │ ├── suicide.csv │ ├── accident.csv │ └── homicide.csv ├── data │ ├── .gitignore │ └── accidents-homicides-suicides-bystate.csv.bz2 ├── accidents-homicides-suicides.r ├── accidents-homicides-suicides.r~ ├── .Rhistory └── readme.md ├── historic ├── .Rhistory ├── output │ └── Thumbs.db ├── homicide-historic.r └── readme.md ├── .gitignore ├── maps ├── maps.zip └── map_mx.RData ├── config └── config.yaml ├── states ├── readme.md~ ├── output │ └── Thumbs.db ├── data │ └── homicide-mun-2008.csv.bz2 ├── merge.bat ├── readme.md ├── .Rhistory ├── homicide-bystate.r~ └── homicide-bystate.r ├── conapo-pop-estimates ├── conapo-states.csv ├── conapo-states-f.csv └── readme.md ├── initialize ├── load-libraries.r └── init.r ├── LICENSE ├── run-all.r ├── library └── utilities.r └── readme.md /drugs/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Benford/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CIEISP/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /timelines/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /trends/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /INEGIvsSNSP/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /guns-executions/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /predictions/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /missing-homicides/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.xls -------------------------------------------------------------------------------- /most-violent-counties/output/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /timelines/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.xls 2 | *.csv -------------------------------------------------------------------------------- /choropleths/output/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | Thumbs.db -------------------------------------------------------------------------------- /missing-homicides/stat-yearbooks/.gitignore: -------------------------------------------------------------------------------- 1 | *.zip -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/.gitignore: -------------------------------------------------------------------------------- 1 | *.xls -------------------------------------------------------------------------------- /historic/.Rhistory: -------------------------------------------------------------------------------- 1 | q() 2 | q() 3 | q() 4 | q() 5 | q() 6 | -------------------------------------------------------------------------------- /accidents-homicides-suicides/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.xls 2 | *.csv -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.xls 2 | Thumbs.db 3 | .Rhistory 4 | *.r~ 5 | *.md~ 6 | -------------------------------------------------------------------------------- /maps/maps.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/maps/maps.zip -------------------------------------------------------------------------------- /drugs/druguse.r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/drugs/druguse.r -------------------------------------------------------------------------------- /CIEISP/data/LC.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/LC.pdf -------------------------------------------------------------------------------- /CIEISP/data/RE.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/RE.pdf -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/config/config.yaml -------------------------------------------------------------------------------- /maps/map_mx.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/maps/map_mx.RData -------------------------------------------------------------------------------- /states/readme.md~: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/states/readme.md~ -------------------------------------------------------------------------------- /drugs/data/druguse.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/drugs/data/druguse.csv -------------------------------------------------------------------------------- /states/output/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/states/output/Thumbs.db -------------------------------------------------------------------------------- /timelines/constants.r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/constants.r -------------------------------------------------------------------------------- /CIEISP/data/BD02.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD02.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD03.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD03.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD04.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD04.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD05.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD05.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD06.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD06.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD07.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD07.TXT.bz2 -------------------------------------------------------------------------------- /CIEISP/data/BD08.TXT.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/CIEISP/data/BD08.TXT.bz2 -------------------------------------------------------------------------------- /historic/output/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/historic/output/Thumbs.db -------------------------------------------------------------------------------- /historic/homicide-historic.r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/historic/homicide-historic.r -------------------------------------------------------------------------------- /timelines/data/pop.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/pop.csv.bz2 -------------------------------------------------------------------------------- /timelines/report/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/report/report.pdf -------------------------------------------------------------------------------- /timelines/report/report.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/report/report.tex -------------------------------------------------------------------------------- /choropleths/data/inegi1990.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/choropleths/data/inegi1990.csv -------------------------------------------------------------------------------- /choropleths/data/inegi1995.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/choropleths/data/inegi1995.csv -------------------------------------------------------------------------------- /choropleths/data/inegi2000.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/choropleths/data/inegi2000.csv -------------------------------------------------------------------------------- /choropleths/data/inegi2005.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/choropleths/data/inegi2005.csv -------------------------------------------------------------------------------- /choropleths/data/pop-w.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/choropleths/data/pop-w.csv.bz2 -------------------------------------------------------------------------------- /INEGIvsSNSP/data/states-icesi.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/INEGIvsSNSP/data/states-icesi.csv -------------------------------------------------------------------------------- /timelines/data/county-month.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month.csv.bz2 -------------------------------------------------------------------------------- /INEGIvsSNSP/output/INEGI-SNSP-dif.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/INEGIvsSNSP/output/INEGI-SNSP-dif.png -------------------------------------------------------------------------------- /conapo-pop-estimates/conapo-states.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/conapo-pop-estimates/conapo-states.csv -------------------------------------------------------------------------------- /missing-homicides/#icesi-vs-inegi.r#: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/missing-homicides/#icesi-vs-inegi.r# -------------------------------------------------------------------------------- /states/data/homicide-mun-2008.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/states/data/homicide-mun-2008.csv.bz2 -------------------------------------------------------------------------------- /timelines/data/county-month-mx.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month-mx.csv.bz2 -------------------------------------------------------------------------------- /conapo-pop-estimates/conapo-states-f.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/conapo-pop-estimates/conapo-states-f.csv -------------------------------------------------------------------------------- /missing-homicides/data/INEGIvsICESI.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/missing-homicides/data/INEGIvsICESI.csv -------------------------------------------------------------------------------- /predictions/data/executions-bystate.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/predictions/data/executions-bystate.csv -------------------------------------------------------------------------------- /timelines/data/county-month-ver.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month-ver.csv.bz2 -------------------------------------------------------------------------------- /guns-executions/data/firearm-hom-state.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/guns-executions/data/firearm-hom-state.csv -------------------------------------------------------------------------------- /timelines/data/county-month-chiapas.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month-chiapas.csv.bz2 -------------------------------------------------------------------------------- /timelines/data/county-month-gue-oax.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month-gue-oax.csv.bz2 -------------------------------------------------------------------------------- /timelines/data/county-month-nl-tam.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/timelines/data/county-month-nl-tam.csv.bz2 -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/output/Thumbs.db -------------------------------------------------------------------------------- /guns-executions/data/firearm-hom-statetot.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/guns-executions/data/firearm-hom-statetot.csv -------------------------------------------------------------------------------- /missing-homicides/output/INEGIvsYearbook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/missing-homicides/output/INEGIvsYearbook.png -------------------------------------------------------------------------------- /most-violent-counties/data/poblacionh.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/most-violent-counties/data/poblacionh.csv.bz2 -------------------------------------------------------------------------------- /most-violent-counties/data/poblacionm.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/most-violent-counties/data/poblacionm.csv.bz2 -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/accident.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/output/accident.png -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/homicide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/output/homicide.png -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/states.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/output/states.csv -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/suicide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/output/suicide.png -------------------------------------------------------------------------------- /accidents-homicides-suicides/accidents-homicides-suicides.r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/accidents-homicides-suicides.r -------------------------------------------------------------------------------- /accidents-homicides-suicides/accidents-homicides-suicides.r~: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/accidents-homicides-suicides.r~ -------------------------------------------------------------------------------- /accidents-homicides-suicides/data/accidents-homicides-suicides-bystate.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diegovalle/Homicide-MX-Drug-War/HEAD/accidents-homicides-suicides/data/accidents-homicides-suicides-bystate.csv.bz2 -------------------------------------------------------------------------------- /states/merge.bat: -------------------------------------------------------------------------------- 1 | montage output/2008-homicide-bars.png output/2008-homicide-map.png -geometry 480x480 output/montage2008.png 2 | montage output/2006-2008-change-homicide.png output/2006-2008-change-homicide-map.png -geometry 480x480 output/montage2006-2008.png 3 | -------------------------------------------------------------------------------- /accidents-homicides-suicides/.Rhistory: -------------------------------------------------------------------------------- 1 | fix(hom) 2 | fix(deaths) 3 | hom 4 | fix(hom) 5 | fix(sui) 6 | fix(Acc) 7 | fix(acc) 8 | fix(hom) 9 | hom$rate 10 | writeClipboard(hom$rate) 11 | writeClipboard(as.character(hom$rate)) 12 | q() 13 | fix(deaths) 14 | q() 15 | q() 16 | q() 17 | q() 18 | -------------------------------------------------------------------------------- /conapo-pop-estimates/readme.md: -------------------------------------------------------------------------------- 1 | Population Estimates 2 | ===================== 3 | Official Mexican population at the state level for the years 1990-2030 4 | 5 | Source 6 | ------ 7 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=193) -------------------------------------------------------------------------------- /initialize/load-libraries.r: -------------------------------------------------------------------------------- 1 | library(fUnitRoots) 2 | library(ggplot2) 3 | library(Hmisc) 4 | library(maptools) 5 | library(RColorBrewer) 6 | library(classInt) 7 | library(Cairo) 8 | library(yaml) 9 | library(boot) 10 | library(classInt) 11 | library(plotrix) 12 | library(car) 13 | library(directlabels) 14 | library(strucchange) 15 | library(xtable) 16 | library(tseries) 17 | library(grDevices) 18 | library(shape) 19 | library(cluster) 20 | 21 | -------------------------------------------------------------------------------- /trends/readme.md: -------------------------------------------------------------------------------- 1 | Trends and Seasonal Decomposition of Homicides in Mexico 2 | ===================================== 3 | 4 | Sources 5 | ------ 6 | __Homicide Data:__ 7 | 8 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 9 | 10 | __Population Data:__ 11 | 12 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 13 | 14 | -------------------------------------------------------------------------------- /guns-executions/data/firearm-executions.csv: -------------------------------------------------------------------------------- 1 | Year,Firearm.Homicides,Executions,Population,Renglones,Reforma,Milenio 2 | 1998,7329,,95790135,,, 3 | 1999,6478,,97114831,,, 4 | 2000,5433,,98438557,,, 5 | 2001,5292,,99715527,1080,, 6 | 2002,5161,,100909374,1230,, 7 | 2003,5064,,101999555,1290,, 8 | 2004,4715,,103001867,1304,, 9 | 2005,5133,,103946866,1776,, 10 | 2006,5705,,104874282,2221,2120, 11 | 2007,4979,,105790725,,2280,2773 12 | 2008,8572,,106682518,,5153,5661 13 | 2009,,,107550697,,6587,8281 14 | -------------------------------------------------------------------------------- /INEGIvsSNSP/readme.md: -------------------------------------------------------------------------------- 1 | INEGI vs SNSP homicide data 2 | ================================================================= 3 | 4 | 5 | 6 | Sources 7 | ------- 8 | __Mexican Homicide Data:__ 9 | 10 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 11 | 12 | [ICESI](http://www.icesi.org.mx/documentos/estadisticas/estadisticas/denuncias_homicidio_doloso_1997_2008.xls) 13 | 14 | __Population Data:__ 15 | 16 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 17 | 18 | -------------------------------------------------------------------------------- /missing-homicides/data/PAHO-UN-INEGI-ICESI.csv: -------------------------------------------------------------------------------- 1 | Year,PAHO,UN Survey of Crime,ICESI,INEGI 2 | 1990,,,,13.69520201 3 | 1991,,,,14.78792554 4 | 1992,,,,15.79288471 5 | 1993,,,,15.23458386 6 | 1994,,,,17.34212109 7 | 1995,18.4,,,16.78286096 8 | 1996,17,,,15.40748018 9 | 1997,15.8,,17.10767812,14.14614354 10 | 1998,15.9,14.93277311,16.28038211,14.14028699 11 | 1999,14.4,15.13354037,15.06772946,12.45741755 12 | 2000,12.3,14.11122449,13.76493156,10.81080455 13 | 2001,11.7,13.94185777,14.24853323,10.23511614 14 | 2002,11.3,13.03722513,12.99185544,9.929701873 15 | 2003,11.3,,12.5098585,9.801023152 16 | 2004,10.6,,11.34930884,9.060029951 17 | 2005,10.5,,10.82764727,9.555843656 18 | 2006,11.2,,11.05514124,9.916635234 19 | 2007,,,9.727695883,8.318309568 20 | 2008,,,11.78918555,12.77435165 21 | -------------------------------------------------------------------------------- /Benford/readme.md: -------------------------------------------------------------------------------- 1 | Do homicide data follow Benford's law? 2 | ================================================ 3 | In Benford's original paper he found that death rates followed the distribution that bears his name, so it is reasonable to assume that the homicide data should follow Benford's law. However neither the data from the INEGI or the ICESI follow Benford's law. 4 | 5 | Output 6 | ------ 7 | * Charts of first digits in the homicide data from the INEGI and the ICESI 8 | 9 | Sources 10 | ------ 11 | __Mexican Homicide Data:__ 12 | 13 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 14 | 15 | [ICESI](http://www.icesi.org.mx/documentos/estadisticas/estadisticas/denuncias_homicidio_doloso_1997_2008.xls) 16 | 17 | __Death rates:__ 18 | 19 | Original data from Benford's paper: [Mathworld](http://mathworld.wolfram.com/BenfordsLaw.html) 20 | -------------------------------------------------------------------------------- /predictions/data/executions-bymonth.csv: -------------------------------------------------------------------------------- 1 | Year,Month,MonthT,Reforma,Milenio 2 | 2007,1,jan,234,204 3 | 2007,2,feb,104,198 4 | 2007,3,mar,192,275 5 | 2007,4,apr,248,255 6 | 2007,5,may,237,279 7 | 2007,6,jun,247,244 8 | 2007,7,jul,143,217 9 | 2007,8,aug,174,284 10 | 2007,9,sep,142,236 11 | 2007,10,oct,183,202 12 | 2007,11,nov,136,183 13 | 2007,12,dec,156,196 14 | 2008,1,jan,250,247 15 | 2008,2,feb,249,251 16 | 2008,3,mar,301,378 17 | 2008,4,apr,290,270 18 | 2008,5,may,477,493 19 | 2008,6,jun,331,505 20 | 2008,7,jul,554,509 21 | 2008,8,aug,522,555 22 | 2008,9,sep,496,448 23 | 2008,10,oct,553,669 24 | 2008,11,nov,547,701 25 | 2008,12,dec,661,635 26 | 2009,1,jan,452,480 27 | 2009,2,feb,587,633 28 | 2009,3,mar,462,508 29 | 2009,4,apr,508,546 30 | 2009,5,may,539,510 31 | 2009,6,jun,507,769 32 | 2009,7,jul,465,854 33 | 2009,8,aug,538,748 34 | 2009,9,sep,714,826 35 | 2009,10,oct,558,840 36 | 2009,11,nov,458,682 37 | 2009,12,dec,786,885 38 | 2010,1,jan,764,904 39 | 2010,2,feb,759,799 40 | 2010,3,mar,,1130 41 | -------------------------------------------------------------------------------- /initialize/init.r: -------------------------------------------------------------------------------- 1 | source("initialize/load-libraries.r") 2 | 3 | config <- yaml.load_file("config/config.yaml") 4 | map.icesi <- config$maps$map.icesi 5 | map.inegi.ct <- config$maps$map.inegi.ct 6 | map.inegi.st <- config$maps$map.inegi.st 7 | 8 | #Unzip the maps 9 | unzip("maps/maps.zip", exdir = "maps") 10 | 11 | testMapsExist <- function(f){ 12 | mapfiles <- c("ESTADOS.shp", "ESTADOS.shx", "ESTADOS.dbf", 13 | "MUNICIPIOS.shp", "MUNICIPIOS.shx", "MUNICIPIOS.dbf") 14 | mapfiles <- sapply(mapfiles, function(x) paste("maps/", x, sep = "")) 15 | if(!(FALSE %in% file.exists(mapfiles))){ 16 | f 17 | } else { 18 | print("get the maps from:") 19 | print("homicide-maps-3.1.1.zip from http://files.diegovalle.net/") 20 | print("or") 21 | print("v3.1.1 from http://mapserver.inegi.org.mx/data/mgm/") 22 | print("Áreas Geoestadísticas Estatales y Zonas Pendientes por Asignar (6.47 Mb)") 23 | print("Áreas Geoestadísticas Municipales y Zonas Pendientes por Asignar (30.6 Mb)") 24 | print("Unzip the files into the 'maps' subdirectory") 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Diego Valle-Jones 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/suicide.csv: -------------------------------------------------------------------------------- 1 | "","Year","Type.of.Death","Tot","pop.mex","rate" 2 | "3",1990,"Suicidio",1810,83971014,2.15550570819593 3 | "7",1991,"Suicidio",2017,85583336,2.35676720991572 4 | "11",1992,"Suicidio",2161,87184832,2.47864215647052 5 | "15",1993,"Suicidio",2226,88752014,2.50811209760265 6 | "19",1994,"Suicidio",2561,90265775,2.83717721362277 7 | "23",1995,"Suicidio",2877,91724528,3.13656560871046 8 | "27",1996,"Suicidio",2993,93130089,3.21378410794818 9 | "31",1997,"Suicidio",3339,94478046,3.53415437910306 10 | "35",1998,"Suicidio",3309,95790135,3.45442670062006 11 | "39",1999,"Suicidio",3320,97114831,3.4186333496271 12 | "43",2000,"Suicidio",3488,98438557,3.54332703190682 13 | "47",2001,"Suicidio",3771,99715527,3.78175808066481 14 | "51",2002,"Suicidio",3856,100909374,3.82125054110434 15 | "55",2003,"Suicidio",4093,101999555,4.0127626046996 16 | "60",2004,"Suicidio",4096,103001867,3.97662694793678 17 | "65",2005,"Suicidio",4323,103946866,4.15885554452406 18 | "70",2006,"Suicidio",4267,104874282,4.06868101371126 19 | "75",2007,"Suicidio",4401,105790725,4.16010004657781 20 | "80",2008,"Suicidio",4578,106682518,4.29123729531768 21 | -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/accident.csv: -------------------------------------------------------------------------------- 1 | "","Year","Type.of.Death","Tot","pop.mex","rate" 2 | "1",1990,"Accidente",34424,83971014,40.995098618197 3 | "5",1991,"Accidente",35257,85583336,41.1961038770445 4 | "9",1992,"Accidente",34818,87184832,39.9358457214209 5 | "13",1993,"Accidente",33773,88752014,38.0532209669067 6 | "17",1994,"Accidente",36575,90265775,40.5192333417621 7 | "21",1995,"Accidente",35008,91724528,38.1664542334849 8 | "25",1996,"Accidente",34598,93130089,37.1501846197097 9 | "29",1997,"Accidente",35508,94478046,37.5833344394104 10 | "33",1998,"Accidente",34956,95790135,36.4922755354714 11 | "37",1999,"Accidente",35308,97114831,36.3569597315162 12 | "41",2000,"Accidente",35048,98438557,35.6039351531738 13 | "45",2001,"Accidente",35158,99715527,35.2583003447397 14 | "49",2002,"Accidente",35354,100909374,35.0353972070028 15 | "53",2003,"Accidente",35115,101999555,34.4266207828064 16 | "57",2004,"Accidente",34852,103001867,33.8362798802472 17 | "62",2005,"Accidente",35934,103946866,34.5695848107628 18 | "67",2006,"Accidente",36318,104874282,34.6300344635494 19 | "72",2007,"Accidente",39066,105790725,36.927622908341 20 | "77",2008,"Accidente",37835,106682518,35.4650421730766 21 | -------------------------------------------------------------------------------- /accidents-homicides-suicides/output/homicide.csv: -------------------------------------------------------------------------------- 1 | "","Year","Type.of.Death","Tot","pop.mex","rate" 2 | "2",1990,"Homicidio",11500,83971014,13.6952020133995 3 | "6",1991,"Homicidio",12656,85583336,14.7879255372798 4 | "10",1992,"Homicidio",13769,87184832,15.7928847072849 5 | "14",1993,"Homicidio",13521,88752014,15.2345838596970 6 | "18",1994,"Homicidio",15654,90265775,17.3421210863143 7 | "22",1995,"Homicidio",15394,91724528,16.7828609595026 8 | "26",1996,"Homicidio",14349,93130089,15.4074801753921 9 | "30",1997,"Homicidio",13365,94478046,14.1461435389974 10 | "34",1998,"Homicidio",13545,95790135,14.1402869930186 11 | "38",1999,"Homicidio",12098,97114831,12.4574175493339 12 | "42",2000,"Homicidio",10642,98438557,10.8108045509038 13 | "46",2001,"Homicidio",10206,99715527,10.2351161419425 14 | "50",2002,"Homicidio",10020,100909374,9.92970187289042 15 | "54",2003,"Homicidio",9997,101999555,9.80102315152257 16 | "58",2004,"Homicidio",9332,103001867,9.06002995071924 17 | "63",2005,"Homicidio",9933,103946866,9.5558436557385 18 | "68",2006,"Homicidio",10400,104874282,9.91663523379354 19 | "73",2007,"Homicidio",8800,105790725,8.31830956825374 20 | "78",2008,"Homicidio",13628,106682518,12.7743516515049 21 | -------------------------------------------------------------------------------- /drugs/readme.md: -------------------------------------------------------------------------------- 1 | Drug Use and its Relation to the Drug War 2 | ========================================= 3 | Regressions on drug use and the homicide rate to see if they are correlated. A blue line means the p-value < 0.5 and a red line that it is not significant. 4 | 5 | Sources 6 | ------ 7 | 8 | [Encuesta Nacional de Addicciones 2008](http://www.insp.mx/medios/noticias/index.php?art/id:263): 9 | 10 | * Any Drug, Illegal Drugs, and Medical Drugs: 11 | 12 | Cuadro A47 13 | 14 | Incidencia acumulada para el uso de drogas por entidad. 15 | 16 | Población total de 12 a 65 años. México, ENA 2008 17 | 18 | * Marihuana, Cocaine, and Amphetamines: 19 | 20 | Cuadro A53 21 | 22 | Incidencia acumulada para el uso de drogas por entidad. 23 | 24 | Población total de 12 a 65 años. México, ENA 2008 25 | 26 | 27 | [State Abbreviations](http://www.statoids.com/umx.html): Which for some reason abbreviates Baja California as BCN, there's no Norte in California! I changed it to BC. 28 | 29 | Drug Eradication: [2010 International Narcotics Control Strategy Report (INCSR)](http://www.state.gov/p/inl/rls/nrcrpt/2010/vol1/137197.htm) 30 | 31 | Cocaine Price: [WORLD DRUG REPORT 2009](https://docs.google.com/viewer?url=http%3A%2F%2Fwww.unodc.org%2Fdocuments%2Fwdr%2FWDR_2009%2FWDR2009_eng_web.pdf) (p. 220) 32 | -------------------------------------------------------------------------------- /run-all.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Fri Mar 19 07:53:06 2010 5 | ######################################################## 6 | #Run all scripts and save the charts in the output directories 7 | 8 | #If you have a slow computer you might want to go get a cup of coffee 9 | 10 | source("initialize/init.r") 11 | 12 | source("accidents-homicides-suicides/accidents-homicides-suicides.r") 13 | source("Benford/benford.r") 14 | source("guns-executions/guns-executions.r") 15 | source("trends/seasonal-decomposition.r") 16 | source("predictions/predictions.r") 17 | source("historic/homicide-historic.r") 18 | source("missing-homicides/missing-homicides.r") 19 | source("missing-homicides/massacres.r") 20 | source("CIEISP/cieisp.r") 21 | source("CIEISP/michoacan.r") 22 | source("INEGIvsSNSP/inegi-vs-snsp.r") 23 | source("INEGIvsSNSP/snsp-vs-cieisp.r") 24 | source("drugs/druguse.r") 25 | source("drugs/eradication.r") 26 | source("most-violent-counties/most-violent.r") 27 | source("timelines/timelines-mun.r") 28 | source("timelines/ciudad-juarez.r") 29 | source("states/homicide-bystate.r") 30 | 31 | #You need the shp files for the next lines 32 | testMapsExist(source("choropleths/county-maps-homicide.r")) 33 | testMapsExist(source("most-violent-counties/cities-mun.r")) 34 | 35 | -------------------------------------------------------------------------------- /accidents-homicides-suicides/readme.md: -------------------------------------------------------------------------------- 1 | Accidents, Homicides and Suicides 2 | ================================= 3 | The homicide rate in Mexico 4 | 5 | Sources 6 | ------- 7 | Website of the [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) for the homicide data 8 | 9 | [CONAPO](http://conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 10 | for population estimates at the national level (1990-2030) 11 | 12 | Codebook 13 | ----------- 14 | Variables used to download the data from the INEGI: 15 | 16 | Mortalidad general 17 | 18 | Consulta de: Defunciones accidentales y violentas   Por: Ent y mun de ocurrencia, Año de ocurrencia y Tipo de defunción   Según: Año de registro 19 | 20 | The first four columns of the database are nameless but correspond to: 21 | 22 | Code - Numeric code for each state and county 23 | 24 | State - Name of each state 25 | 26 | Year - The year in which the violent death _occurred_ 27 | 28 | Type.of.Death - Whether the death was due to suicide, homicide or an accident 29 | 30 | 1990 ... 2008 - The rest of the columns correspond to the year in which the murder was _registered_ 31 | 32 | The weird order of the database is because the website of the INEGI is a steaming pile of broccoli an only lets you download the data ordered by the year in which the murder was registered. 33 | -------------------------------------------------------------------------------- /historic/readme.md: -------------------------------------------------------------------------------- 1 | Homicide in Mexico 1990-2008 2 | ============================ 3 | 4 | * Chart of homicides rate in Mexico from 1990-2008 and an estimate for 2009 5 | * Chart comparing the US, Mexico, and England and Wales 6 | 7 | Sources 8 | ------ 9 | __Mexican Homicide Data:__ 10 | 11 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 12 | 13 | __Brazil:__ 14 | 15 | 1990-1999: [PAHO](http://www.paho.org/english/hcp/hcn/vio/violence-graphs.htm) Source: Minister of Justice, National Secretary of Public Security 16 | 17 | 2000-2007: [Mapa da violência Brasil 2010](http://www.institutosangari.org.br/mapadaviolencia/MapaViolencia2010.pdf) Gráfico 3.1.2. Evolução das Taxas de Homicídio (em 100.000). Brasil, 1997/2007. 18 | 19 | 20 | __US:__ 21 | 22 | 1950-2005: [FBI, Uniform Crime Reports, 1950-2005](http://bjs.ojp.usdoj.gov/content/homicide/tables/totalstab.cfm) 23 | 24 | 2006-2008: Also from the [FBI](http://www.fbi.gov/ucr/cius2008/data/table_01.html) 25 | 26 | __England and Wales:__ 27 | 28 | [Homicides, Firearm Offences and 29 | Intimate Violence 2007/08](http://www.homeoffice.gov.uk/rds/pdfs09/hosb0209.pdf). 30 | Table 1.01 Offences initially recorded by the police as homicide by current classification: England and Wales, 1955 to 2007/08 31 | 32 | __Population Data:__ 33 | 34 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 35 | -------------------------------------------------------------------------------- /predictions/readme.md: -------------------------------------------------------------------------------- 1 | Predictions for the Homicide Rate in 2009 and 2010 2 | ================================================== 3 | A linear regression to predict future homicide rates 4 | 5 | Sources 6 | ------ 7 | __Homicide Data:__ 8 | 9 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 10 | 11 | __Population Data:__ 12 | 13 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 14 | 15 | __Executions__ 16 | 17 | _Reforma_: 18 | 19 | I used [Engauge](http://digitizer.sourceforge.net/) to scan the Figure "Ejecuciones By Month and Year, Jan. 2007- Feb. 2010" from the JUSTICE IN MEXICO PROJECT [Monthly News Report, Number 53](http://www.justiceinmexico.org/news/pdf/justiceinmexico-march2010news-report032410.pdf) March 2010 20 | 21 | _Milenio_: 22 | 23 | [2007-2009](http://impreso.milenio.com/node/8697054) 24 | 25 | [2010](http://impreso.milenio.com/media/2010/04/01/mex-mp-02G.jpg) 26 | 27 | Notes: 28 | 29 | The data from _Reforma_ looks like it's missing 1600 executions in Chihuahua, supposedly there were only 2000 executions in Chihuahua during 2009, but in this same period there were 2600 murders in Ciudad Juárez alone, normally there used to be about 200 homicides in Juárez. The data from _Milenio_ looks better, but it seems to be missing about 200 executions during the month of november. I decided to average the data from _Reforma_ and _Milenio_ to estimate the homicide rate in 2009 and 2010. -------------------------------------------------------------------------------- /states/readme.md: -------------------------------------------------------------------------------- 1 | Homicide in Mexico at the State Level 2 | ===================================== 3 | * Maps and charts with the homicide rate at the state level 4 | * Maps and charts with the change in homicide rate from 2006 to 2008 5 | * If you run combine.bat you get a merged chart of the bar plots and maps 6 | * A small multiples chart with the murder rate of each state from 1990 to 2008 7 | 8 | Sources 9 | ------ 10 | __Homicide Data:__ 11 | 12 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 13 | 14 | __Population Data:__ 15 | 16 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 17 | 18 | Codebook 19 | -------- 20 | Variables used to download the data from the INEGI: 21 | 22 | Mortalidad general 23 | 24 | Consulta de: Defunciones accidentales y violentas   Por: Ent y mun de ocurrencia, Año de ocurrencia y Sexo   Según: Año de registro 25 | 26 | 27 | 28 | The first four columns of the database are nameless but correspond to: 29 | 30 | Code - Numeric code for each state and county 31 | 32 | County - Name of each county 33 | 34 | Year.of.Murder - The year in which the violent death _occurred_ 35 | 36 | Sex - Sex of the deceased 37 | 38 | 1990 ... 2008 - The rest of the columns correspond to the year in which the murder was _registered_ 39 | 40 | The weird order of the database is because the website of the INEGI is a steaming pile of broccoli an only lets you download the data ordered by the year in which the murder was registered. 41 | 42 | 43 | Notes: 44 | ------ 45 | If you want charts of femicides edit the file "config/config.yaml" and set sex to Female 46 | -------------------------------------------------------------------------------- /guns-executions/readme.md: -------------------------------------------------------------------------------- 1 | Guns and Executions in Mexico 2 | ================================================================= 3 | 4 | 5 | Sources 6 | ------- 7 | 8 | __Population Data:__ 9 | 10 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 11 | 12 | __Mexican Homicide Data:__ 13 | 14 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 15 | 16 | 17 | __Executions__ 18 | 19 | [Drug Violence in Mexico Data and Analysis from 2001-2009](http://www.justiceinmexico.org/resources/pdf/drug_violence.pdf) by David A. Shirk: Figure 1. Drug-Related Killings in Mexico, 2001-2009. Sources: Data for 2001-2005 from Marcos Pablo Moloeznik, “[Principales efectos de la militarización del 20 | combate al narcotráfico en México](http://renglones.iteso.mx/upload/archivos/Marcos_Moloeznik.pdf)" in Renglones, No. 61, Sept. 2009-Mar. 2010, Guadalajara: Instituto 21 | Tecnológico y de Estudios Superiores de Occidente, A.C., 2009. Data for 2006-2009 compiled from 22 | Reforma in Justice In Mexico Project Narcobarometer Database (www.justiceinmexico.org). 23 | 24 | [2007-2009 - Milenio](http://impreso.milenio.com/node/8697054) 25 | 26 | 27 | __Homicides with Firearm__ 28 | 29 | Dirección General de Información en Salud (DGIS). Base de datos de defunciones 1979-2007. [en línea]: Sistema Nacional de Información en Salud (SINAIS). [México]: Secretaría de Salud. [Consulta: 01 abril 2009]. 30 | 31 | Firearms-related homicide: 32 | 33 | * X93 Assault by handgun discharge 34 | * X94 Assault by rifle, shotgun and larger firearm discharge 35 | * X95 Assault by other and unspecified firearm discharge 36 | 37 | 38 | -------------------------------------------------------------------------------- /CIEISP/michoacan.r: -------------------------------------------------------------------------------- 1 | #CIEISP 2007 2 | #http://www.pfp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368010//archivo 3 | 4 | #CIESIP 2006 5 | #http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368009//archivo 6 | 7 | #CIEISP 2005 8 | #http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368008//archivo 9 | 10 | m07 <- c(44, 33, 61, 50, 41, 40, 51, 38, 42, 47, 38, 42) 11 | m06 <- c(47, 36, 40, 46, 55, 44, 55, 88, 58, 65, 60, 67) 12 | m05 <- c(33, 37, 40, 35, 38, 32, 39, 22, 34, 37, 39, 41) 13 | 14 | source("library/utilities.r") 15 | source("timelines/constants.r") 16 | hom <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 17 | hom <- cleanHom(hom) 18 | hom$County <- factor(cleanNames(hom, "County")) 19 | hom <- subset(hom, County == "Michoacán" & 20 | Year.of.Murder >= 2005 & 21 | Year.of.Murder <= 2007) 22 | 23 | mich.hom <- data.frame(tot = c(m05,m06,m07, hom$Total.Murders), 24 | type = rep(c("SNSP", "INEGI"), each=12*3), 25 | month = rep(1:12), 26 | year = rep(2005:2007, each = 12)) 27 | mich.hom$Date <- as.Date(paste(mich.hom$year, mich.hom$month, "15"), 28 | "%Y%m%d") 29 | 30 | Cairo(file = "CIEISP/output/michoacan.png", width=700, height=400) 31 | print(ggplot(mich.hom, aes(as.Date(Date), tot, group = type, 32 | color = type)) + 33 | geom_line(size = 1.2) + 34 | scale_x_date() + 35 | xlab("") + ylab("Monthly number of homicides") + 36 | opts(title="Differences in homicides in Michoacan") + 37 | geom_vline(aes(xintercept = op.mich), alpha = .7) + 38 | annotate("text", x = op.mich, y = 20, hjust = 1.01, vjust = 0, 39 | label="Joint Operation Michoacan", )) 40 | dev.off() 41 | -------------------------------------------------------------------------------- /choropleths/readme.md: -------------------------------------------------------------------------------- 1 | Choropleths of Mexican Homicide Rates 1990, 1995, 2000, 2006-2008 2 | ================================================================= 3 | 4 | Notes 5 | ----- 6 | To run it on your machine you'll need to download a couple of maps from the INEGI 7 | 8 | Sources 9 | ------- 10 | __Mexican Homicide Data:__ 11 | 12 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 13 | 14 | __Population Data (1990, 1995, and 2000):__ 15 | 16 | [XI Censo general de población y vivienda 1990](http://www.inegi.org.mx/sistemas/olap/proyectos/bd/consulta.asp?p=16653&c=11893&s=est) 17 | 18 | [I Conteo de población y vivienda 1995](http://www.inegi.org.mx/sistemas/olap/proyectos/bd/consulta.asp?p=16647&c=11881&s=est) 19 | 20 | [XII Censo general de población y vivienda 2000](http://www.inegi.org.mx/sistemas/olap/proyectos/bd/consulta.asp?p=14048&c=10252&s=est) 21 | 22 | __Population Data (2006-2008):__ 23 | 24 | CONAPO [De la población de México 2005-2050 ](http://www.conapo.gob.mx/00cifras/proy/municipales.xls) 25 | 26 | __Maps__ 27 | 28 | [INEGI](http://mapserver.inegi.org.mx/data/mgm/). You'll need to register to do download them and they have to be version 3.1.1 or they won't work 29 | 30 | * [Áreas Geoestadísticas Estatales y Zonas Pendientes por Asignar (6.47 Mb)](http://mapserver.inegi.org.mx/data/mgm/redirect.cfm?fileX=ESTADOS311) 31 | 32 | * [Áreas Geoestadísticas Municipales y Zonas Pendientes por Asignar (30.6 Mb)](http://mapserver.inegi.org.mx/data/mgm/redirect.cfm?fileX=MUNICIPIOS311) 33 | 34 | 35 | Alternatively you can download a simplified version of the maps from : [http://files.diegovalle.net](files.diegovalle.net) homicide-maps-3.1.1.zip 36 | 37 | 38 | Notes: 39 | ------ 40 | If you want choropleths of femicides set the variable sex in the file "config/config.yaml" to Female 41 | -------------------------------------------------------------------------------- /INEGIvsSNSP/inegi-vs-snsp.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Wed Feb 17 19:25:40 2010 5 | ######################################################## 6 | #Small multiples plot to compare the INEGI and ICESI data 7 | 8 | source("library/utilities.r") 9 | 10 | icesi <- read.csv("INEGIvsSNSP/data/states-icesi.csv") 11 | inegi <- read.csv("accidents-homicides-suicides/output/states.csv") 12 | 13 | icesi$State <- iconv(icesi$State, "windows-1252", "utf-8") 14 | #inegi$State <- iconv(inegi$State, "windows-1252", "utf-8") 15 | 16 | icesi <- melt(icesi, id = "State") 17 | icesi$org <- "SNSP" 18 | 19 | #Remove the years 1990:1996 20 | inegi <- inegi[,-(2:8)] 21 | inegi <- melt(inegi, id = "State") 22 | inegi$org <- "INEGI" 23 | 24 | ii <- rbind(inegi, icesi) 25 | ii$variable <- rep(1997:2008, each=32) 26 | ii$State <- cleanNames(ii, "State") 27 | 28 | 29 | #Population of Mexico 1997-2008 30 | #source: CONAPO 31 | pop <- read.csv("conapo-pop-estimates/conapo-states.csv") 32 | pop$State <- iconv(pop$State, "windows-1252", "utf-8") 33 | pop <- pop[-(33) ,-(2:8)] 34 | pop <- melt(pop, id = "State") 35 | pop$variable <- rep(1997:2008, each=32) 36 | pop <- pop[order(pop$State),] 37 | pop$State <- cleanNames(pop, "State") 38 | ii.pop <- merge(ii, pop, by = c("State", "variable"), all.x = TRUE) 39 | ii.pop$rate <- ii.pop$value.x / ii.pop$value.y * 100000 40 | 41 | variat <- function(df){ 42 | ine <- subset(df, org == "INEGI") 43 | sns <- subset(df, org == "SNSP") 44 | var(ine$rate - sns$rate) 45 | } 46 | ii.pop <- merge(ii.pop, ddply(ii.pop, .(State), variat), by = "State") 47 | ii.pop$State <- with(ii.pop, reorder(factor(State), -V1)) 48 | print(ggplot(ii.pop, aes(variable, rate, group = org, color = org)) + 49 | geom_line(size = 2) + 50 | facet_wrap(~ State, scales = "free_y")) 51 | dev.print(png, file = "INEGIvsSNSP/output/INEGI-SNSP.png", width = 960, height = 600) 52 | 53 | 54 | 55 | dif <- cast(ii.pop[order(ii.pop$org),], State ~ variable, 56 | value = "rate", 57 | fun.aggregate = function(x) x[1] - x[2]) 58 | difm <- melt(dif, id=c("State")) 59 | difm <- ddply(difm, .(State), transform, var = var(value)) 60 | difm$State <- reorder(factor(difm$State), -difm$var) 61 | print(ggplot(difm, aes(as.numeric(as.character(variable)), value)) + 62 | geom_line(size = 1.2, color = "darkred") + 63 | facet_wrap(~ State) + 64 | geom_hline(yintercept = 0, color = "gray40") + 65 | opts(title = "Differences in homicide rates (INEGI - SNSP)") + 66 | xlab("Year") + ylab("Difference in Homicide Rate") + 67 | scale_x_continuous(breaks = c(1998, 2003, 2008), 68 | labels = c("98", "03", "08")) + 69 | #scale_y_continuous(formatter="percent") + 70 | theme_bw()) 71 | dev.print(png, file = "INEGIvsSNSP/output/INEGI-SNSP-dif.png", width = 960, height = 600) 72 | -------------------------------------------------------------------------------- /drugs/eradication.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Tue May 25 09:58:24 2010 5 | ######################################################## 6 | #Pretty plots of marijuana and opium eradication, cultivation and 7 | #cocaine prices 8 | 9 | drugPlot <- function(df, filename, ylab, title) { 10 | print(ggplot(df, aes(years, area, group = type, 11 | color = type)) + 12 | geom_line() + 13 | geom_rect(xmin = 2006, xmax = 2009, 14 | ymin=0, ymax=Inf, alpha = .02, fill = "red", 15 | color= "#efefef") + 16 | annotate("text", x = 2007.5, y = 28000, label = "Drug War") + 17 | ylab(ylab) + xlab("") + 18 | opts(title = title) + 19 | scale_y_continuous(formatter = "comma", limits = c(0, max(df$area, na.rm = TRUE)))) 20 | filename <- paste("drugs/output/", filename) 21 | dev.print(png, filename, width=640, height=480) 22 | } 23 | 24 | #http://www.state.gov/p/inl/rls/nrcrpt/2010/vol1/137197.htm 25 | #Eradication (ha) 26 | mj <- c(14135, 18663, 23316, 30162, 30857, 30852, 36585, 30775, 28699) 27 | opium <- c(11471, 13189, 11410, 16890, 21609, 15926, 20034, 19158, 19115) 28 | drugs <- data.frame(area = c(mj, opium), 29 | years = rep(2009:2001, 2), 30 | type = rep(c("marijuana", "poppy"), each = 9)) 31 | drugPlot(drugs, "cannabis-poppy-eradication.png", "Eradication (ha)", 32 | "The amount of cannabis and opium poppy\neradicated has decreased") 33 | 34 | #Harvestable / Net Cultivation (ha) 35 | mj <- c(12000, 8900, NA, 8600, 5600, 5800, 7500, 7900, 4100) 36 | poppy <- c(15000, 6900, NA, 5100, 3300, 3500, 4800, 2700, 4400) 37 | drugs <- data.frame(area = c(mj, poppy), 38 | years = rep(2009:2001, 2), 39 | type = rep(c("marijuana", "poppy"), each = 9)) 40 | drugPlot(drugs, "cannabis-poppy-cultivation.png", 41 | "Net Cultivation (ha)", 42 | "The amount of cannabis and opium poppy\ncultivated has increased") 43 | 44 | ######################################################## 45 | #Cocaine Prices 46 | ######################################################## 47 | #WORLD DRUG REPORT 2009 48 | #http://www.unodc.org/documents/wdr/WDR_2009/WDR2009_eng_web.pdf page 220 49 | #Prices adjusted for purity and inflation 50 | cok.prc <- c(421,343,263,251,232,275,217,208,189,193,224,227,158,166,147,140,134,162,216) 51 | p <- qplot(1990:2008, cok.prc, geom="line") + 52 | geom_rect(xmin = 2006, xmax = 2009, 53 | ymin=0, ymax=Inf, alpha = .02, fill = "red") + 54 | annotate("text", x = 2007.5, y = 370, label = "Drug War") + 55 | opts(title = "There has been an increase in the price of\ncocaine adjusted for purity and inflation\nsince the start of the drug war") + 56 | ylab("Street price - US$/gram") + xlab("year") + 57 | ylim(c(0, max(cok.prc))) 58 | print(p) 59 | dev.print(png, "drugs/output/coke-price.png", width=640, height=480) 60 | -------------------------------------------------------------------------------- /INEGIvsSNSP/snsp-vs-cieisp.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Fri May 21 18:07:05 2010 5 | ######################################################## 6 | #Compare the data the SNSP gave to the ICESI with that of contained 7 | #in the original CIEISP forms 8 | source("library/utilities.r") 9 | 10 | cieisp <- read.csv("CIEISP/output/cieisp.csv") 11 | snsp <- read.csv("INEGIvsSNSP/data/states-icesi.csv", 12 | fileEncoding = "windows-1252") 13 | #snsp$State <- iconv(snsp$State, "windows-1252", "utf-8") 14 | 15 | cieisp <- subset(cieisp, Mes == "Total") 16 | cieisp <- cieisp[ ,c(2:4,12)] 17 | 18 | snsp$State <- cleanNames(snsp, "State") 19 | snsp <- melt(snsp, id = "State") 20 | snsp$Anio <- as.numeric(gsub("X", "", snsp$variable)) 21 | 22 | cie.snsp <- merge(snsp, cieisp, by.y=c("Entidad","Anio"), 23 | by.x=c("State", "Anio")) 24 | cie.snsp$variable <- NULL;cie.snsp$Mes <- NULL 25 | names(cie.snsp) <- c("State", "Anio", "SNSP", "CIEISP") 26 | 27 | mcie <- melt(cie.snsp, id = c("State","Anio")) 28 | mcie <- ddply(mcie, .(State), transform, 29 | dif = abs(mean(value[1:7] - value[8:14]))) 30 | mcie$State <- reorder(factor(mcie$State), -mcie$dif) 31 | 32 | print(ggplot(mcie, aes(Anio, value, group = variable, 33 | color = variable)) + 34 | geom_line() + 35 | opts(title = "Differences in reported homicides according to the SNSP data and the original CIEISP forms") + 36 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 37 | ylab("Number of Homicides") + 38 | facet_wrap(~State, scale = "free_y")) 39 | dev.print(png, file = "INEGIvsSNSP/output/SNSP-vs-CIEISP.png", width = 960, height = 600) 40 | 41 | 42 | mx <- subset(mcie, State == "México") 43 | inegi <- c(1957,1909,1739,2017,1743,1235,1559) 44 | mx <- rbind(mx, data.frame(State = "México", 45 | Anio = 2002:2008, 46 | variable = "INEGI", 47 | value = inegi, 48 | dif = 0)) 49 | 50 | print(ggplot(mx, aes(Anio, value, group = variable, 51 | color = variable)) + 52 | geom_line(size = 2, alpha = .4) + 53 | opts(title = "Differences in reported homicides according to\nSNSP data, the original CIEISP forms, and the INEGI\nin the State of Mexico") + 54 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 55 | ylab("Number of Homicides") + 56 | xlab("")) 57 | dev.print(png, file = "INEGIvsSNSP/output/mxSNSP-vs-CIEISP-vs-INEGI.png", width = 640, height = 480) 58 | 59 | mx <- subset(mx, variable %in% c("INEGI", "SNSP")) 60 | mx$variable <- factor(mx$variable) 61 | print(ggplot(mx, aes(Anio, value, group = variable, 62 | color = variable)) + 63 | geom_line(size = 2, alpha = .4) + 64 | opts(title = "Differences in reported homicides according to\nSNSP data and the INEGI in the State of Mexico") + 65 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 66 | ylab("Number of Homicides") + 67 | xlab("")) 68 | dev.print(png, file = "INEGIvsSNSP/output/mxSNSP-vs-INEGI.png", width = 640, height = 480) 69 | 70 | 71 | -------------------------------------------------------------------------------- /states/.Rhistory: -------------------------------------------------------------------------------- 1 | ) 2 | pmatch(mexico.shp$NAME, iconv(hom.diff$County.x,"UTF-8","ASCII")) 3 | pmatch(mexico.shp$NAME, iconv(hom.diff$County.x,"UTF-8","ASCII")) 4 | pmatch(iconv(hom.diff$County.x,"UTF-8","ASCII"), mexico.shp$NAME) 5 | iconv(hom.diff$County.x, "", "ASCII") 6 | iconv(hom.diff$County.x, "UTF-8", "ASCII") 7 | iconv(hom.diff$County.x, "latin1", "ASCII") 8 | pmatch(iconv(hom.diff$County.x,"","ASCII"), mexico.shp$NAME) 9 | pmatch(iconv(Micho, mexico.shp$NAME) 10 | ) 11 | pmatch("Micho"", mexico.shp$NAME) 12 | "" 13 | pmatch("Micho", mexico.shp$NAME) 14 | fix(hom.diff.map) 15 | iconvlist() 16 | pmatch(iconv(hom.diff$County.x,"ISO_8859-2","ASCII"), mexico.shp$NAME) 17 | pmatch(iconv(hom.diff$County.x,"WINDOWS-1252","ASCII"), mexico.shp$NAME) 18 | pmatch(iconv(hom.diff$County.x,"latin2","ASCII"), mexico.shp$NAME) 19 | fix(hom.diff.map) 20 | pmatch(iconv(hom.diff$County.x,"latin2","ASCII",""), mexico.shp$NAME) 21 | pmatch(iconv(hom.diff$County.x,"","ASCII",""), mexico.shp$NAME) 22 | pmatch(iconv(hom.diff$County.x,"","ASCII",""), mexico.shp$NAME)q() 23 | q() 24 | install.package("plotrix") 25 | install.packes("plotrix") 26 | install("plotrix") 27 | install.packages("plotrix") 28 | install.packages("shape") 29 | library(shape) 30 | colorlegend(zlim=c(0,10)) 31 | 32 | index 33 | q() 34 | q() 35 | fix(hom2008) 36 | q() 37 | current.grobTree() 38 | current.grobTree() 39 | ggopt() 40 | hom.mpop$color 41 | old 42 | p 43 | q() 44 | fix(hom.mpop) 45 | fix(t) 46 | fix(t) 47 | fix(t) 48 | ) 49 | fix(a) 50 | asd 51 | + 52 | ()) 53 | fix(t) 54 | fix(hom.mpop) 55 | fix(t) 56 | fix(t) 57 | fix(hom.mpop) 58 | q() 59 | fix(hom) 60 | q() 61 | fix(hom) 62 | fix(hom) 63 | fix(hom) 64 | fix(hom) 65 | fix(hom) 66 | fix(hom) 67 | fix(hom) 68 | fix(hom) 69 | fix(hom) 70 | gsub("([ab])", "\\1_\\1_", "abc and ABC") 71 | 72 | fix(hom) 73 | > fix(hom) 74 | > fix(hom) 75 | total.hom 76 | fix(hom) 77 | fix(hom) 78 | fix(hom2008) 79 | fix(hom) 80 | fix(hom2008) 81 | fix(hom.diff) 82 | fix(hom.diff) 83 | fix(hom.diff) 84 | fix(hom.diff) 85 | fix(hom.diff) 86 | fix(hom.diff) 87 | q() 88 | fix(hom2008.map) 89 | x <- c(1:10) 90 | x[x>5] 91 | debug(greenReds) 92 | round(x[x>5]) 93 | difference <- hom.diff$Diff 94 | difference[difference<0] 95 | greenReds(hom.diff.map$Diff) 96 | greenReds(hom.diff$Diff) 97 | fix(hom.diff.map) 98 | fix(hom.diff) 99 | fix(hom.diff) 100 | fix(hom.diff.map) 101 | fix(hom2008.map) 102 | fix(hom.diff.map) 103 | fix(hom.diff) 104 | fix(hom.diff.map) 105 | df <- hom.diff 106 | varname <- "County.x" 107 | df 108 | fix(df) 109 | fix(df) 110 | fix(df) 111 | fix(df) 112 | q() 113 | q() 114 | fix(hom2008) 115 | fix(hom2008) 116 | fix(hom.diff) 117 | fix(hom.diff) 118 | fix(hom.diff) 119 | fix(hom.diff) 120 | fix(pop) 121 | fix(pop) 122 | fix(pop) 123 | fix(hom.diff) 124 | fix(hom.diff) 125 | fix(hom.diff) 126 | fix(hom.diff) 127 | fix(hom2008) 128 | x <- 1:15 129 | sort(x) 130 | sort(-x) 131 | abs(sort[-x])[2] 132 | abs(sort[-x]) 133 | abs 134 | abs(x) 135 | abs(order[-x]) 136 | x <- sort[-x]) 137 | x <- sort[-x] 138 | x <- sort(-x) 139 | x 140 | abs(x) 141 | abs((order[-x])) 142 | sort(-x) 143 | -sort(-x) 144 | range(x)[2] 145 | range(x)[1] 146 | x <- 1:15 147 | range(x)[2] 148 | fix(hom.diff) 149 | q() 150 | q() 151 | -------------------------------------------------------------------------------- /timelines/data/military-operations.yaml: -------------------------------------------------------------------------------- 1 | R.C.Assassination: 2 | Rodolfo Carrillo Fuentes Assassination: 2004-09-11 3 | source: http://www.noroeste.com.mx/publicaciones.php?id=409384 4 | 5 | Fox.Troops.NL: 6 | Troops in Nuevo Laredo: 2005-06-13 7 | source: http://www.univision.com/content/content.jhtml?cid=625397 8 | 9 | Op.Michoacan: 10 | Operation Michoacan: 2006-12-11 11 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 12 | #p. 21 13 | 14 | Op.Tijuana: 15 | Operation Tijuana: 2007-01-03 16 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 17 | #p. 21 18 | 19 | Op.Guerrero: 20 | Operation Guerrero: 2007-01-15 21 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 22 | #p. 21 23 | 24 | #Sinaloa, Durango, Chihuahua 25 | Op.Triangulo.Dorado.I: 26 | Operation Triangulo Dorado: 2007-01-22 27 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 28 | #p. 23 29 | 30 | Op.Tamaulipas-Nuevo Leon: 31 | Operation Tamaulipas Nuevo-Leon: 2007-02-19 32 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 33 | #p. 23 34 | 35 | Op.Veracruz: 36 | Operation Veracruz: 2007-05-14 37 | source: http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo 38 | #p. 24 39 | 40 | #Sinaloa, Durango, Chihuahua 41 | Op.Triangulo.Dorado.II: 42 | Phase II = 2007-05-01 43 | source: http://www.elsiglodetorreon.com.mx/noticia/328548.a-punto-de-reiniciar-la-guerra-contra-el-narc.html 44 | 45 | #Sinaloa, Durango, Chihuahua 46 | Op.Triangulo.Dorado.III: 47 | Phase III = 2008-02-01 48 | source: http://www.elsiglodetorreon.com.mx/noticia/328548.a-punto-de-reiniciar-la-guerra-contra-el-narc.html 49 | 50 | Op.Chihuahua: 51 | Operation Chihuahua: 2008-03-27 52 | source: http://www.juarezpress.com/not_detalle.php?id_n=12641&busca=sedena 53 | 54 | Reinforcements.Cd.Juarez: 55 | 5,000 Reinforcements Sent: 2009-03-01 56 | source: http://eleconomista.com.mx/notas-online/politica/2009/03/01/arriban-militares-ciudad-juarez 57 | 58 | Op.Sonora: 59 | Operation Sonora I: 2008-03-07 60 | source: http://www.elimparcial.com/busqueda/TraerNota.aspx?Numnota=295876 61 | #Other sources say it started 2008-04-05 http://www.zetatijuana.com/html/EdcionesAnteriores/Edicion1726/Principal.html 62 | 63 | Op.Sinaloa: 64 | Operation Culiacan-Navolato: 2008-05-13 65 | source: http://www.tabascohoy.com.mx/nota.php?id_nota=155210 66 | 67 | ABL.Captured: 68 | ABL Captured: 2008-01-21 69 | source: http://www.sedena.gob.mx/index.php?id_art=1169 70 | 71 | EAF.Captured: 72 | EAF Captured: 2008-10-26 73 | source: http://www.elfinanciero.com.mx/ElFinanciero/Portal/cfpages/contentmgr.cfm?docId=152259&docTipo=1&orderby=docid&sortby=ASC 74 | 75 | Calderon.Visit: 76 | First Presidential visit to Cd. Juarez: 2010-02-11 77 | Second Presidential visit to Cd. Juarez: 2010-02-18 78 | source1: http://articles.latimes.com/2010/feb/12/world/la-fg-mexico-juarez12-2010feb12 79 | source2: http://www.alertnet.org/thenews/newsdesk/N18195289.htm 80 | 81 | Police.Takeover: 82 | Police takevoer: 2010-04-08 83 | source: http://www.google.com/hostednews/ap/article/ALeqM5gMi5B2USfJStXxfqgWWr2xjRYpOgD9EV8GRO0 84 | -------------------------------------------------------------------------------- /missing-homicides/massacres.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sun Apr 04 13:06:20 2010 5 | ######################################################## 6 | #Check if some of the big massacres that occured in Mexico were recorded in the INEGI homicide database 7 | 8 | whichNumber <- function(df, county, month, year){ 9 | which(df$County == county & 10 | df$Month.of.Murder == month & 11 | df$Year.of.Murder == year) 12 | } 13 | 14 | extractCol <- function(df, county, month, year) { 15 | num <- whichNumber(df, county, month, year) 16 | df[((num-2):(num+2)), ] 17 | } 18 | #Tijuana prison riot](http://news.newamericamedia.org/news/view_article.html?article_id=413e55db3c6d5eac317d63edb8ce03d8): September 2008, 25 dead. In the INEGI homicide database 19 | hom.tj <- read.csv("timelines/data/county-month.csv.bz2") 20 | hom.tj$County <- iconv(hom.tj$County, "windows-1252", 21 | "utf-8") 22 | 23 | tj <- extractCol(hom.tj, "Tijuana", "Septiembre", "2008") 24 | 25 | #[Ensenada](http://articles.latimes.com/1998/sep/19/news/mn-24394): September 19, 1998, 18 dead. In the INEGI homidice database 26 | en <- extractCol(hom.tj, "Ensenada", "Septiembre", "1998") 27 | 28 | #[Reynosa prison riot](http://www.horacerotam.com/Not_interior1.asp?Id=NHCT22047&link=280): October 2008, 21 dead. In the INEGI homicide database 29 | hom.ry <- read.csv("timelines/data/county-month-nl-tam.csv.bz2") 30 | ry <- extractCol(hom.ry, "Reynosa", "Octubre", "2008") 31 | 32 | 33 | #[Acteal Massacre](http://zedillo.presidencia.gob.mx/pages/chiapas/docs/crono.html): 45 dead December 22, 1997. Not in the INEGI homicide database 34 | hom.chip <- read.csv("timelines/data/county-month-chiapas.csv.bz2") 35 | hom.chip$County <- iconv(hom.chip$County, "windows-1252", 36 | "utf-8") 37 | acteal <- extractCol(hom.chip, "Chenalhó", "Diciembre", "1997") 38 | 39 | #since no deaths occured in Chenalhó lets check for the whole state of Chiapas 40 | chiapas <- extractCol(hom.chip, "Chiapas", "Diciembre", "1997") 41 | 42 | 43 | #[Aguas Blancas Massacre](http://www.sfgate.com/chronicle/special/mexico/massacre.html): (Warning: Graphic Video) 17 dead June 28, 1995. In the INEGI homicide database 44 | hom.gue <- read.csv("timelines/data/county-month-gue-oax.csv.bz2") 45 | hom.gue$County <- iconv(hom.gue$County, "windows-1252", 46 | "utf-8") 47 | 48 | AB <- extractCol(hom.gue, "Coyuca de Benítez", "Junio", "1995") 49 | 50 | #Cananea-Arizpe 51 | hom.son <- read.csv("timelines/data/county-month.csv.bz2") 52 | cana <- extractCol(hom.son, "Arizpe", "Mayo", "2007") 53 | 54 | #[Decapitated Bodies in Yucatán](http://www2.esmas.com/noticierostelevisa/mexico/009070/hallan-doce-cadaveres-decapitados-yucatan): August 28 2008, 12 dead. In the INEGI homicide database 55 | yuc <- extractCol(hom.tj, "Yucatán", "Agosto", "2008") 56 | 57 | #[24 dead outside Mexico City](http://www.nytimes.com/2008/09/14/world/americas/14mexico.html?_r=1): September 13, 2008, 24 dead. In the INEGI homicide database 58 | hom.mx <- read.csv("timelines/data/county-month-mx.csv.bz2") 59 | oco <- extractCol(hom.mx, "Ocoyoacac", "Septiembre", "2008") 60 | 61 | massacres <- rbind(AB, acteal, chiapas, en, cana, yuc, oco, tj, ry) 62 | write.csv(massacres, "missing-homicides/output/massacres.csv") 63 | rm(hom.tj, hom.gue, hom.chip, hom.ry, hom.son) 64 | -------------------------------------------------------------------------------- /CIEISP/readme.md: -------------------------------------------------------------------------------- 1 | Homicides according to the SNSP (CIEISP Format) 2 | ================================================== 3 | This is the original data on which the homicide data from the ICESI is based. The data for 2008 seems really incomplete since it only records 309 homicides for Chihuahua instead of ~2,600. The quite a few difference between the ICESI data and the CIEISP forms from the CIDE. They did however add a footnote saying: 4 | 5 | >En el caso de Tlaxcala, el formato CIEISP señala cero homicidios dolosos. El dato utilizado fue proporcionado verbalmente por el Secretario General de Gobierno del Estado a ICESI. 6 | 7 | So if the Secretary General of the State Goverment says it, it must be true! 8 | 9 | The CIEISP formas are also downloadable from the PFD website and for generating the Michoacan chart that's what I did 10 | 11 | 12 | Sources 13 | ------- 14 | 15 | [CIEISP 2007](http://www.pfp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368010//archivo) 16 | 17 | [CIESIP 2006](http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368009//archivo) 18 | 19 | [CIEISP 2005](http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/368008//archivo) 20 | 21 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2002 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net//10089/16125 22 | 23 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2003 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16126 24 | 25 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2004 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16127 26 | 27 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2005 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16122 28 | 29 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2006 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16123 30 | 31 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2009 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16124 32 | 33 | Bergman, Marcelo et al. (2009?). Incidencia Delictiva - 2008 [en línea]. Distribuido por: México, D.F.: Banco de Información para la Investigación Aplicada en Ciencias Sociales : Centro de Investigación y Docencia Económicas. [Fecha de consulta - 2010-04-19], http://hdl.handle.net/10089/16106 34 | 35 | Copyright 36 | ---------- 37 | >Usted puede copiar, distribuir y usar esta obra pero para ello es obligatorio reconocer los derechos de autor citando al autor de la >misma, mencionando que la obra se encuentra depositada en el Centro de Investigación y Docencia Económicas, A.C., utilizar la obra >únicamente para fines lícitos y respetar los demás términos de la Licencia Creative Commons Atribución 2.5 38 | -------------------------------------------------------------------------------- /Benford/benford.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sat Feb 20 16:16:23 2010 5 | ######################################################## 6 | #Check to see if the homicide data was manipulated with 7 | #1. Benford's law 8 | 9 | source("library/utilities.r") 10 | 11 | ######################################################## 12 | #Read and clean the data 13 | ######################################################## 14 | hom <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 15 | hom <- cleanHom(hom) 16 | hom <- addMonths(hom) 17 | hom <- subset(hom, Year.of.Murder >= 1994) 18 | 19 | ######################################################## 20 | #See if the first digits of the monthly number of homicides 21 | #follow a Benford distribution 22 | ######################################################## 23 | dBen <- function(x){ 24 | log(1 + 1/x) / log(10) 25 | } 26 | 27 | firstDigit <- function(x){ 28 | x <- as.numeric(substring(formatC(x, format = 'e'), 1, 1)) 29 | } 30 | 31 | benObsExp <- function(x, name =""){ 32 | n <- length(x) 33 | x <- firstDigit(x) 34 | obs.freq <- tabulate(x, nbins = 9) 35 | obs.freq <- obs.freq / sum(obs.freq) 36 | ben.freq <- dBen(1:9) 37 | name <- paste(name, "homicide data (red) vs. Benford's law (black)") 38 | df <- data.frame(obs = obs.freq, ben = ben.freq, digits = 1:9) 39 | ggplot(df, aes(digits, ben)) + geom_line() + 40 | geom_point(aes(digits, obs), color = "red") + 41 | opts(title = name) + ylab("") + 42 | scale_y_continuous(formatter = "percent") 43 | } 44 | 45 | chiBen <- function(x) { 46 | n <- length(x) 47 | x <- firstDigit(x) 48 | obs.freq <- tabulate(x, nbins = 9) 49 | chisq.test(obs.freq, p = dBen(1:9)) 50 | } 51 | 52 | #The original data from Benford's paper 53 | #http://mathworld.wolfram.com/BenfordsLaw.html 54 | death.rate <- (c(27.0,18.6,15.7,9.4,6.7,6.5,7.2,4.8,4.1)/100) * 418 55 | chisq.test(death.rate, p = dBen(1:9)) 56 | 57 | #For the vital statistics data 58 | #Benford's law is scale invariant 59 | inegi <- hom$Total.Murders 60 | print(benObsExp(inegi, "INEGI")) 61 | ggsave("Benford/output/INEGI.png", dpi=72, width = 6, height = 6) 62 | chiBen(inegi) 63 | 64 | #For the police data 65 | icesi <- melt(read.csv("INEGIvsSNSP/data/states-icesi.csv"), 66 | id ="State") 67 | print(benObsExp(icesi$value, "SNSP")) 68 | ggsave("Benford/output/ICESI.png", dpi=72, width = 6, height = 6) 69 | chiBen(icesi$value) 70 | 71 | 72 | #Mean absolute deviation 73 | 74 | #INEGI 75 | y <- tabulate(firstDigit(inegi), nbins=9) 76 | sum(abs((dBen(1:9)) - y / sum(y)))*100 77 | #ICESI 78 | y <- tabulate(firstDigit(icesi$value), nbins=9) 79 | sum(abs((dBen(1:9)) - y / sum(y)))*100 80 | 81 | 82 | #Some regressions to see the size of the difference 83 | 84 | #Null hypothesis 85 | y <- dBen(1:9) 86 | fitBen <- lm(log(y) ~ c(1:9)) 87 | #INEGI 88 | y1 <- tabulate(firstDigit(inegi), nbins=9) 89 | fitInegi <- lm(log(y) ~ c(1:9)) 90 | #ICESI 91 | y2 <- tabulate(firstDigit(icesi$value), nbins=9) 92 | fitIcesi <- lm(log(y) ~ c(1:9)) 93 | 94 | anova(fitInegi, fitBen) 95 | anova(fitIcesi, fitBen) 96 | 97 | 98 | #check if the last digit follows a uniform distribution 99 | #Is this even reasonable? I don't think so 100 | lastDigit <- function(v){ 101 | v - 10*floor(v/10) 102 | } 103 | chi.uni <- function(x) { 104 | v <- table(lastDigit(x)) 105 | chisq.test(as.vector(v), p = rep(1/10, 10)) 106 | } 107 | 108 | chi.uni(inegi) 109 | chi.uni(icesi$value) 110 | plot(as.vector(table(lastDigit(icesi$value)))) 111 | -------------------------------------------------------------------------------- /library/utilities.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Thu Feb 04 13:35:41 2010 5 | ######################################################## 6 | #Shared functions 7 | 8 | #Group dates into intervals 9 | cutDates <- function(df, dates, hack = 0) { 10 | DateMid <- as.Date(format(df$Date, "%Y%m15"), 11 | "%Y%m%d") + hack 12 | vec <- c(DateMid[1], dates, DateMid[length(DateMid)] + 1000) 13 | as.numeric(as.factor(cut(DateMid, vec))) 14 | } 15 | 16 | #Get rid of the full name of the states (eg: Veracruz de 17 | #Ignacio de la Llave changes to Veracruz 18 | cleanNames <- function(df, varname = "County"){ 19 | df[[varname]] <- gsub("* de .*","", df[[varname]]) 20 | df[[varname]] 21 | } 22 | 23 | monthSeq <- function(st, len){ 24 | #start <- as.Date(st) 25 | #next.mon <- seq(start, length = len, by='1 month') 26 | #next.mon - 1 27 | seq(as.Date(st), length = len, by='1 month') 28 | } 29 | 30 | monthlyPop <- function() { 31 | pop <- read.csv("conapo-pop-estimates/conapo-states.csv") 32 | pop2 <- data.frame(year = rep(1990:2008, each = 12), 33 | month = rep(1:12)) 34 | pop2$Monthly.Pop[pop2$month == 6] <- unlist(pop[33,2:ncol(pop)]) 35 | pop2$Monthly <- na.spline(pop2$Monthly.Pop, na.rm=FALSE) 36 | pop2 37 | } 38 | 39 | addHom <- function(df, pop) { 40 | hom.st <- ddply(df, .(Month.of.Murder, Year.of.Murder), 41 | function(df) sum(df$Total.Murders)) 42 | hom.st <- hom.st[order(hom.st$Year.of.Murder, 43 | hom.st$Month.of.Murder),] 44 | pop$murders <- hom.st$V1 45 | pop$rate <- (pop$murders / pop$Monthly) * 100000 * 12 46 | start <- as.Date("1990/01/15") 47 | next.mon <- seq(start, length = 12*19, by='1 month') 48 | period <- next.mon - 1 49 | pop$date <- period 50 | pop 51 | } 52 | 53 | addTrend <- function(df){ 54 | hom.ts <- ts(df$rate, start=1990, freq = 12) 55 | hom.stl <- stl(hom.ts, "per") 56 | cbind(df, data.frame(hom.stl$time.series)) 57 | } 58 | 59 | cleanHom <- function(df) { 60 | df <- subset(df, Code == "#NAME?" & 61 | Year.of.Murder != "Total" & 62 | Year.of.Murder != "No especificado" & 63 | Month.of.Murder != "Total" & 64 | Month.of.Murder != "No especificado" & 65 | County != "Extranjero" 66 | ) 67 | df$Year.of.Murder <- as.numeric(gsub('[[:alpha:]]', '', 68 | df$Year.of.Murder)) 69 | df <- subset(df, Year.of.Murder >= 1990) 70 | df$County <- iconv(df$County, "windows-1252", "utf-8") 71 | col2cvt <- 5:ncol(df) 72 | df[is.na(df)] <- 0 73 | df$Total.Murders <- apply(df[ , col2cvt], 1, sum) 74 | df$Month.of.Murder <- factor(df$Month.of.Murder) 75 | #The months are in a weird order, so 04=Abril, etc. 76 | levels(df$Month.of.Murder) <- c("04","08","12","01","02","07","06","03","05","11","10","09") 77 | df 78 | } 79 | 80 | addMonths <- function(df){ 81 | states <- unique(factor(df$County)) 82 | start <- as.Date("1990/1/15") 83 | next.mon <- seq(start, length=12*19, by='1 month') 84 | period <- next.mon 85 | dates.df <- data.frame(Date = factor(rep(period, 86 | each = 32)), 87 | County = states) 88 | dates <- strptime(as.character(dates.df$Date), "%Y-%m-%d") 89 | dates.df$Month.of.Murder <- dates$mon + 1 90 | dates.df$Year.of.Murder <- dates$year + 1900 91 | df$Month.of.Murder <- as.numeric(as.character(df$Month.of.Murder)) 92 | df <- merge(dates.df, df, 93 | by = c("Month.of.Murder", 94 | "Year.of.Murder", "County"), 95 | all.x = TRUE) 96 | df[is.na(df)] <- 0 97 | df 98 | } 99 | -------------------------------------------------------------------------------- /missing-homicides/stat-yearbooks/wget.bat: -------------------------------------------------------------------------------- 1 | rem Be careful, downloads about ~250 MB 2 | 3 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/ags/aee_ags_09.zip 4 | ping localhost -n 350 > nul 5 | 6 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/bc/aee_bc_09.zip 7 | ping localhost -n 350 > nul 8 | 9 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/bcs/aee_bcs_09.zip 10 | ping localhost -n 350 > nul 11 | 12 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/cam/aee_cam_09.zip 13 | ping localhost -n 350 > nul 14 | 15 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/coa/aee_coa_09.zip 16 | ping localhost -n 350 > nul 17 | 18 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/col/aee_col_09.zip 19 | ping localhost -n 350 > nul 20 | 21 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/chs/aee_chs_09.zip 22 | ping localhost -n 350 > nul 23 | 24 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/chih/aee_chih_09.zip 25 | ping localhost -n 350 > nul 26 | 27 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/df/aee_df_09.zip 28 | ping localhost -n 350 > nul 29 | 30 | 31 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/gto/aee_gto_09.zip 32 | ping localhost -n 350 > nul 33 | 34 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/gro/aee_gro_09.zip 35 | ping localhost -n 350 > nul 36 | 37 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/hgo/aee_hgo_09.zip 38 | ping localhost -n 350 > nul 39 | 40 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/jal/aee_jal_09.zip 41 | ping localhost -n 350 > nul 42 | 43 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/mex/aee_mex_09.zip 44 | ping localhost -n 350 > nul 45 | 46 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/mic/aee_mic_09.zip 47 | ping localhost -n 350 > nul 48 | 49 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/mor/aee_mor_09.zip 50 | ping localhost -n 350 > nul 51 | 52 | 53 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/nln/aee_nln_09.zip 54 | ping localhost -n 350 > nul 55 | 56 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/oax/aee_oax_09.zip 57 | ping localhost -n 350 > nul 58 | 59 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/pue/aee_pue_09.zip 60 | ping localhost -n 350 > nul 61 | 62 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/qro/aee_qro_09.zip 63 | ping localhost -n 350 > nul 64 | 65 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/qtr/aee_qtr_09.zip 66 | ping localhost -n 350 > nul 67 | 68 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/slp/aee_slp_09.zip 69 | ping localhost -n 350 > nul 70 | 71 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/sin/aee_sin_09.zip 72 | ping localhost -n 350 > nul 73 | 74 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/son/aee_son_09.zip 75 | ping localhost -n 350 > nul 76 | 77 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/tab/aee_tab_09.zip 78 | ping localhost -n 350 > nul 79 | 80 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/tam/aee_tam_09.zip 81 | ping localhost -n 350 > nul 82 | 83 | 84 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/ver/aee_ver_09.zip 85 | ping localhost -n 350 > nul 86 | 87 | wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/09/zac/aee_zac_09.zip 88 | ping localhost -n 350 > nul 89 | 90 | 91 | rem for 2008 just replace the 09 with 08 92 | rem wget http://www.inegi.org.mx/est/contenidos/espanol/soc/int/nav/aee/08/zac/aee_zac_08.zip 93 | 94 | 95 | -------------------------------------------------------------------------------- /most-violent-counties/cities-mun.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Wed May 05 18:21:40 2010 5 | ######################################################## 6 | #This program does this and that 7 | 8 | allmun <- data.frame() 9 | for(i in c(1995,2000,2005:2008)) { 10 | f <- read.csv(paste("choropleths/output/map", i, ".csv", sep = "")) 11 | allmun <- rbind(allmun,f) 12 | } 13 | 14 | border <- list("Nuevo Laredo" = " 28 027 ", 15 | "Tijuana" = "02 004", 16 | "Reynosa" = " 28 032 ", 17 | "Nogales" = " 26 043 ", 18 | "Matamoros" = " 28 022 ", 19 | "Juárez" = " 08 037 ", 20 | "Piedras Negras" = " 05 025 ", 21 | "Acuña" = " 05 002 ", 22 | "Mexicali" = " 02 002 ") 23 | 24 | north <- list("Culiacán" = " 25 006 ", 25 | "Chihuahua" = " 08 019 ", 26 | "Durango" = " 10 005 ", 27 | "Mazatlán" = " 25 012 ", 28 | "Ensenada" = " 02 001 ", 29 | "Torreón" = " 05 035 ", 30 | "Saltillo" = " 05 030 ", 31 | "Monterrey" = " 19 039 ") 32 | 33 | south.center <- list("Lázaro Cárdenas" = " 16 052 ", 34 | "Toluca" = " 15 106 ", 35 | "Acapulco de Juárez" = " 12 001 ", 36 | "Cuernavaca" = " 17 007 ", 37 | "Chilpancingo de los Bravo" = " 12 029 ", 38 | "Texcoco" = " 15 099 ", 39 | "Uruapan" = " 16 102 ", 40 | "Morelia" = " 16 053 ", 41 | "Oaxaca de Juárez"= "20067", 42 | "Tuxtla Gutiérrez" = " 07 101 ") 43 | 44 | vacation <- list("Cabos, Los" = " 03 008 ", 45 | "Ensenada" = " 02 001 ", 46 | "Puerto Peñasco" = " 26 048 ", 47 | "Benito Juárez" = " 23 005 ", #Cancún 48 | "Acapulco de Juárez" = " 12 001 ", 49 | "José Azueta" = " 12 038 ", #Zihuatanejo 50 | "Paz, La" = " 03 003 ", 51 | "Mazatlán" = " 25 012 ", 52 | "Manzanillo" = " 06 007 ", 53 | "Ciudad Madero" = " 28 009 ", 54 | "Playas de Rosarito" = " 02 005 ") 55 | 56 | plotCities <- function(cities, title, df) { 57 | mun.int <- subset(df, Code %in% sapply(cities, "[[", 1)) 58 | mun.int$County.x <- iconv(mun.int$County.x, "windows-1252", 59 | "utf-8") 60 | mun.int$County.x <- factor(mun.int$County.x) 61 | p <- ggplot(mun.int, aes(Year.of.Murder, rate, group = County.x, 62 | color = County.x)) + 63 | geom_line(size = 1.5) + 64 | geom_point(size = 5) + 65 | coord_cartesian(xlim = c(1991.5,2009)) + 66 | opts(title = title) + 67 | theme_bw() 68 | print(direct.label(p, first.points)) 69 | filename <- paste("most-violent-counties/output/municipalities-", 70 | title, ".png", sep = "") 71 | dev.print(png, filename, width = 960, height = 600) 72 | } 73 | mapply(plotCities, 74 | list(south.center, north, border, vacation), 75 | list("Cities in Southern and Central Mexico", 76 | "Cities in Northen Mexico (excluding border cities)", 77 | "Mexican Cities Bordering the US", 78 | "Vacation Spots in Mexico"), 79 | MoreArgs = list(df = allmun)) 80 | 81 | 82 | mun08 <- subset(allmun, Population >= 100000 & 83 | Year.of.Murder == 2008) 84 | mun08 <- mun08[order(-mun08$rate),][1:20,] 85 | mun08$County.y <- iconv(mun08$County.y, "windows-1252", 86 | "utf-8") 87 | mun08$County.y <- factor(mun08$County.y) 88 | mun08$County.y <- reorder(mun08$County.y, mun08$rate) 89 | p <- ggplot(mun08, aes(County.y, rate)) + 90 | geom_point() + 91 | geom_segment(aes(xend = County.y, yend=0)) + 92 | coord_flip() + 93 | opts(title = "Most violent municipalities in 2008 (with more than 100,000 people)") + 94 | ylab("Homicide rate") + xlab("") 95 | print(p) 96 | dev.print(png, "most-violent-counties/output/most-violent-2008.png", 97 | width = 500, height = 600) 98 | -------------------------------------------------------------------------------- /timelines/readme.md: -------------------------------------------------------------------------------- 1 | Homicide in Mexico by month 2 | ================================================ 3 | Two scripts to explore how have the different military operations 4 | affected the annualized monthly homicide rate. One for the states 5 | where there have been military operations and the other for Ciudad 6 | Juarez since it is the city with the highest murder rate in the whole world. 7 | 8 | Output 9 | ------ 10 | * Time series of the homicide rate in states with high homicide rates 11 | or states where the military has been sent, with loess lines added 12 | and grouped into before and after military operations. 13 | * Chart of the murder rate in Ciudad Juarez before and after the military took over 14 | 15 | Data 16 | ----- 17 | Because the INEGI has a limit of ~100,000 cells in the files you can download, the data was divided into 3 files: 18 | 19 | _county-month.csv.bz2_: Michoacan, Chihuahua, Durango, Sonora, Baja California (not Norte), Sinaloa, Durango 20 | 21 | _county-month-gue-oax.csv.bz2_: Oaxaca and Guerrero 22 | 23 | _county-month.csv-tam-nl.bz2_: Tamaulipas and Nuevo Leon 24 | 25 | Interesting Municipalities 26 | --------------------------- 27 | These municipalities are worth checking out because of the great increase in the homicide rate they suffered in 2008 (from the choropleths) 28 | 29 | _Guerrero_ 30 | 31 | Bordering the state of Michoacán: Zirándaro, Coyuca de Catalán, Unión de Isidoro Montes de Oca 32 | 33 | _Chihuahua_ 34 | 35 | The municipalities bordering the US excluding Juárez: Janos, Ascensión, Juárez, Guadalupe, Ojinaga, Ahumada, Nuevo Casas Grandes ,Coyame del Sotol 36 | 37 | 38 | Codebook 39 | -------- 40 | Variable used to download the data from the INEGI: 41 | 42 | Tipo de defunción : Homicidio 43 | 44 | Consulta de: Defunciones accidentales y violentas   Por: Ent y mun de ocurrencia, Año de ocurrencia y Mes de ocurrencia   Según: Año de registro 45 | 46 | The columns of the database correspond to: 47 | 48 | Code - Numeric code for each state and county 49 | 50 | County - Name of each county 51 | 52 | Year.of.Murder - The year in which the murder _occurred_ 53 | 54 | Month.of.Murder - The month in which the murder _occurred_ 55 | 56 | 1990 ... 2008 - The rest of the columns correspond to the year in which the murder was _registered_ 57 | 58 | The weird order of the database is because the website of the INEGI is a steaming pile of broccoli an only lets you download the data ordered by the year in which the murder was registered. 59 | 60 | Sources: 61 | -------- 62 | __Homicide Data 1990-2008:__ 63 | 64 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 65 | 66 | __Military Deployment Dates:__ 67 | 68 | Unless otherwise noted all deployment dates come from the [SSP](http://www.ssp.gob.mx/portalWebApp/ShowBinary?nodeId=/BEA%20Repository/270970//archivo) [PDF] p. 21-24 69 | 70 | Michoacan: 12/11/2006 71 | 72 | OPERATIVO CONJUNTO TIJUANA: 01/03/2007 73 | 74 | Operativo Conjunto Guerrero: 01/15/2007 75 | 76 | Operativo Conjunto Triangulo Dorado or Sierra Madre: 01/22/2007, [Part II](http://www.elsiglodetorreon.com.mx/noticia/328548.a-punto-de-reiniciar-la-guerra-contra-el-narc.html): 05/01/2007, [Part III]((http://www.elsiglodetorreon.com.mx/noticia/328548.a-punto-de-reiniciar-la-guerra-contra-el-narc.html)): 02/01/2008 77 | 78 | Operación Conjunta Tamaulipas-Nuevo León: 02/19/2007 79 | 80 | Operativo Veracruz: 05/14/2007 81 | 82 | [Operativo Conjunto Chihuaha](http://www.el-mexicano.com.mx%2Fnoticias%2Fnacional%2F2009%2F03%2F02%2Fsitian-militares-ciudad-juarez.aspx&ei=OoZgS-nmA4XYtgOHwpGzCw&usg=AFQjCNH5AvHSTNwSpMPqT98OuiSYA8kbjg&sig2=rucCCB325xG_lYgmU_Rodw): 03/27/2008. Keep in mind that there were already 539 soldiers on the ground by the time it was announced 83 | [juarezpress](http://www.eluniversal.com.mx/nacion/166104.html) 84 | 85 | [Operación Conjunta Culiacán-Navolato](http://www.tabascohoy.com.mx/nota.php?id_nota=155210): 05/13/2008 86 | 87 | [Operativo Sonora I](http://www.elimparcial.com/busqueda/TraerNota.aspx?Numnota=295876): 03/07/2008 88 | 89 | [Reinforcements for Ciudad Juarez](http://eleconomista.com.mx/notas-online/politica/2009/03/01/arriban-militares-ciudad-juarez): 03/01/2009 90 | 91 | [Vicente Fox sends troops to Nuevo Laredo](http://www.univision.com/content/content.jhtml?cid=625397) 06/13/2005 92 | 93 | __Other Dates:__ 94 | 95 | [Alfredo Beltrán Leyva, "El Mochomo"](http://www.sedena.gob.mx/index.php?id_art=1169) captured: 01/21/2008 96 | 97 | [Eduardo Arellano Félix, "El Doctor"](http://www.elfinanciero.com.mx/ElFinanciero/Portal/cfpages/contentmgr.cfm?docId=152259&docTipo=1&orderby=docid&sortby=ASC) arrested: 10/26/2008 98 | 99 | __Population Data:__ 100 | 101 | CONAPO [De la población de México 2005-2050](http://www.conapo.gob.mx/00cifras/proy/municipales.xls) 102 | 103 | __Ciudad Juarez Monthly Murder Rates in 2009:__ 104 | 105 | For the first half of the 2009 106 | [puntoporpunto](http://www.puntoporpunto.com/informacion-general/en_juarez_suman_mil_13_asesina.php), 107 | and for the second half of 2009 108 | [larednoticias](http://www.larednoticias.com/detalle.cfm?s=26) 109 | 110 | For 2010[Agencia EFE](http://www.google.com/hostednews/epa/article/ALeqM5gVsNv7FxY-In2bVMa5v0rujdQWtQ) 111 | -------------------------------------------------------------------------------- /timelines/report/report.Rnw: -------------------------------------------------------------------------------- 1 | \documentclass[landscape]{article} 2 | 3 | \title{Structural Change in Homicide Rates} 4 | \author{Diego Valle-Jones} 5 | 6 | \usepackage{longtable} 7 | \usepackage{anysize} 8 | 9 | \begin{document} 10 | 11 | \maketitle 12 | 13 | The R package strucchange was used to test for stability in the linear 14 | regressions of the homicide rates to see if the different military 15 | operations caused it to increase. 16 | 17 | \begin{verbatim} 18 | breakpoints(rate ~ ndays) 19 | \end{verbatim} 20 | 21 | where rate is the homicide rate, and ndays are the number of days in each month 22 | 23 | In the tables below, lower refers to the lower confidence interval, and 24 | upper refers to the upper confidence interval. Keep in mind that the murders are 25 | only available as monthly totals, while the exact date of military 26 | operations is available. 27 | 28 | <>= 29 | setwd("timelines/report") 30 | sanitizeCol <- function(x){ 31 | x <- gsub("^Joint Operation", "J. O.", x) 32 | x <- gsub("^Operation", "O.", x) 33 | x 34 | } 35 | printTable <- function(df){ 36 | df.table <- xtable(df) 37 | print(xtable(df.table), include.rownames = FALSE, 38 | tabular.environment = "longtable", 39 | floating = FALSE, 40 | sanitize.colnames.function = sanitizeCol) 41 | } 42 | @ 43 | 44 | \section{Michoacan} 45 | 46 | Municipalities with more than a 100,000 thousand people: 47 | 48 | <>= 49 | printTable(report.ll$mich) 50 | @ 51 | 52 | The cluster of municipalities near the Pacific Coast and the Tierra 53 | Caliente that have a very high homicide rate: 54 | 55 | <>= 56 | printTable(report.ll$mich.int) 57 | @ 58 | 59 | 60 | \section{Guerrero} 61 | 62 | From the charts it looks as if there were two breaks in the homicide rate, 63 | the first one coinciding with the arrival of the army, when there was 64 | a decrease in the homicide rate, especially in Acapulco. And a second 65 | one when Alfredo Beltran Leyva was captured by the army. 66 | 67 | Municipalities with more than a 100,000 thousand people: 68 | 69 | <>= 70 | printTable(report.ll$gue) 71 | @ 72 | 73 | These are the municipalities bordering Michoacan: 74 | 75 | <>= 76 | printTable(report.ll$gue.int) 77 | @ 78 | 79 | 80 | 81 | 82 | \section{Baja California} 83 | I set the minimum segment size as three because the capture of Eduardo 84 | Arrellano Felix occurred near the end of the year, so not many months 85 | with a high number of homicides were available for testing. 86 | 87 | Municipalities with more than a 100,000 thousand people: 88 | 89 | <>= 90 | printTable(report.ll$bcn) 91 | @ 92 | 93 | 94 | \section{Durango} 95 | 96 | Municipalities with more than a 100,000 thousand people: 97 | 98 | <>= 99 | printTable(report.ll$dur) 100 | @ 101 | 102 | These are the municpalities with a high homicide rate: 103 | <>= 104 | printTable(report.ll$dur.int) 105 | @ 106 | 107 | 108 | \section{Sonora} 109 | 110 | Municipalities with more than a 100,000 thousand people: 111 | 112 | <>= 113 | printTable(report.ll$son) 114 | @ 115 | 116 | 117 | \section{Sinaloa} 118 | 119 | Municipalities with more than a 100,000 thousand people: 120 | 121 | <>= 122 | printTable(report.ll$sin) 123 | @ 124 | 125 | Municipalities with a high homicide rate: 126 | 127 | <>= 128 | printTable(report.ll$sin.int) 129 | @ 130 | 131 | 132 | 133 | \section{Tamaulipas} 134 | 135 | From the charts it looks like there were 2 breaks in the homicide 136 | rate, one corresponding to when ``El Chapo'' sent ``La Barbie'' to take 137 | over the plaza in Nuevo Laredo, and the other to when he realized he 138 | wasn't going to be able to take it over. 139 | 140 | Municipalities with more than a 250,000 thousand people: 141 | 142 | <>= 143 | printTable(report.ll$tam) 144 | @ 145 | 146 | 147 | 148 | \section{Nuevo Leon} 149 | 150 | Municipalities with more than a 250,000 thousand people: 151 | 152 | <>= 153 | printTable(report.ll$nl) 154 | @ 155 | 156 | These are the municipalities in the metropolitan area of Monterrey: 157 | 158 | <>= 159 | printTable(report.ll$mont) 160 | @ 161 | 162 | \section{Veracruz} 163 | 164 | Municipalities with more than a 250,000 thousand people: 165 | 166 | <>= 167 | printTable(report.ll$ver) 168 | @ 169 | 170 | These are the interesting municpalities 171 | 172 | <>= 173 | printTable(report.ll$ver.int) 174 | @ 175 | 176 | 177 | 178 | 179 | \section{Chihuahua} 180 | 181 | Municipalities with more than a 100,000 thousand people: 182 | 183 | <>= 184 | printTable(report.ll$chi) 185 | @ 186 | 187 | These are the municipalities near the US border: 188 | <>= 189 | printTable(report.ll$chi.bdr) 190 | @ 191 | 192 | These are the municipalities with a very high homicide rate: 193 | 194 | <>= 195 | printTable(report.ll$chi.int) 196 | @ 197 | 198 | 199 | 200 | \end{document} 201 | <>= 202 | setwd("../..") 203 | @ 204 | -------------------------------------------------------------------------------- /CIEISP/cieisp.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Mon Apr 19 19:30:07 2010 5 | ######################################################## 6 | #Analysis of the original homicide data from SNSP formato CIEISP 7 | 8 | 9 | cieisp <- data.frame() 10 | for(i in 2:8) { 11 | file <- paste("CIEISP/data/BD0", i, ".TXT.bz2", sep = "") 12 | bd <- read.table(file, sep = "\t", header = TRUE) 13 | if (i < 8) 14 | bd$Entidad <- iconv(bd$Entidad, "windows-1252", "utf-8") 15 | if(i == 8) 16 | names(bd) <- names(cieisp) 17 | cieisp <- rbind(cieisp, bd) 18 | } 19 | write.csv(cieisp, "CIEISP/output/cieisp.csv") 20 | 21 | 22 | tot <- subset(cieisp, Mes == "Total" & 23 | Mes != "Totgeneral" & 24 | Entidad != "Totalestados") 25 | cieisp <- subset(cieisp, Mes != "Total" & 26 | Mes != "Totgeneral" & 27 | Entidad != "Totalestados") 28 | hom <- cieisp[,c("Anio", "Mes","Entidad", "HDAF", "THD")] 29 | hom$Mes <- factor(hom$Mes) 30 | hom$MesN <- hom$Mes 31 | levels(hom$MesN) <- 32 | c("04","08","12","01","02","07","06","03","05","11","10","09") 33 | hom$date <- as.Date(paste(hom$Anio, hom$MesN, "01",sep = "-")) 34 | 35 | print(ggplot(hom, aes(as.Date(date), THD, group = Entidad)) + 36 | geom_line() + 37 | scale_x_date() + 38 | facet_wrap(~ Entidad, scale = "free_y") + 39 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 40 | opts(title = "Monthly Number of Homicides (based on SNSP data)")) 41 | dev.print(png, "CIEISP/output/Homicides02-08.png", width = 960, 42 | height = 600) 43 | 44 | 45 | print(ggplot(hom, aes(as.Date(date), HDAF / THD, group = Entidad)) + 46 | geom_line() + 47 | scale_x_date() + 48 | scale_y_continuous(formatter = "percent") + 49 | facet_wrap(~ Entidad, scale = "free_y") + 50 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 51 | opts(title = "Firearm Homicides as a percentage of total homicides(based on SNSP data)")) 52 | dev.print(png, "CIEISP/output/HomicidesGuns02-08.png", width = 960, 53 | height = 600) 54 | 55 | month <- ddply(hom, .(Anio, MesN), function(df) sum(df$THD)) 56 | month$date <- as.Date(paste(month$Anio, month$MesN, "01",sep = "-")) 57 | ggplot(month, aes(date, V1)) + 58 | geom_line() + 59 | scale_x_date() 60 | 61 | 62 | ######################################################## 63 | #Compare the CIEISP data to the INEGI data 64 | ######################################################## 65 | 66 | source("library/utilities.r") 67 | hom2 <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 68 | hom2 <- cleanHom(hom2) 69 | hom2$County <- factor(cleanNames(hom2, "County")) 70 | 71 | 72 | cie.ine <- merge(subset(hom2, Year.of.Murder >=2006 & 73 | Year.of.Murder <= 2008), 74 | hom, 75 | by.x = c("County","Year.of.Murder","Month.of.Murder"), 76 | by.y = c("Entidad", "Anio", "MesN") 77 | ) 78 | 79 | cie.ine <- ddply(cie.ine, .(County), transform, 80 | cor = cor(Total.Murders, THD)) 81 | cie.ine$County <- reorder(factor(cie.ine$County), -cie.ine$cor) 82 | 83 | ggplot(cie.ine, aes(as.Date(date), Total.Murders - THD)) + 84 | geom_line(color = "blue") + 85 | # geom_line(aes(as.Date(date), Total.Murders), color = "red") + 86 | scale_x_date() + 87 | facet_wrap(~ County, scale = "free_y") + 88 | xlab("Year") + ylab("Difference in number of homicides") + 89 | opts(title = "Differences in recorded number of homicides INEGI - SNSP") + 90 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 91 | geom_hline(yintercept = 0, color = "gray40") 92 | 93 | 94 | mad <- function(Total.Murders, THD){ 95 | var(Total.Murders - THD) 96 | } 97 | 98 | cie.ine <- ddply(cie.ine, .(County), transform, 99 | cor = mad(Total.Murders, THD)) 100 | cie.ine$County <- reorder(factor(cie.ine$County), -cie.ine$cor) 101 | print(ggplot(cie.ine, aes(as.Date(date), THD)) + 102 | geom_line(color = "blue") + 103 | geom_line(aes(as.Date(date), Total.Murders), color = "red") + 104 | scale_x_date() + 105 | facet_wrap(~ County, scale = "free_y") + 106 | xlab("Year") + ylab("Number of homicides") + 107 | opts(title = "INEGI (red) vs. SNSP (blue) Monthly Number of Homicides (Jan 2006 - Nov 2008)") + 108 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 ))) 109 | dev.print(png, "CIEISP/output/TwoHomicides02-08.png", width = 960, 110 | height = 600) 111 | 112 | hom2 <- addMonths(hom2) 113 | pop <- monthlyPop() 114 | homrate <- addHom(hom2, pop) 115 | homrate <- addTrend(homrate) 116 | homrate <- subset(homrate, year >= 2002 & year < 2008) 117 | month <- subset(month, Anio < 2008) 118 | month <- month[order(month$date),] 119 | month$date <- monthSeq("2002-01-15", 12*6) 120 | cor(homrate$murders, month$V1) 121 | 122 | ggplot(month, aes(as.Date(date), V1)) + 123 | geom_line(color = "blue") + 124 | geom_line(data = homrate, aes(as.Date(date), murders), 125 | color ="red") + 126 | xlab("Year") + ylab("Number of Homicides") + 127 | opts(title = "INEGI (red) vs. SNSP (blue) monthly number of homicides") + 128 | scale_x_date() 129 | -------------------------------------------------------------------------------- /timelines/ciudad-juarez.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Wed Feb 03 20:54:12 2010 5 | ######################################################## 6 | #This program plots the murder rate in Ciudad Juarez 7 | #before and after the army took over. 8 | 9 | #The population data is from the CONAPO (2005-2030) 10 | #The murder data for 2007 and 2008 is from the INEGI 11 | #The murder data for 2009 is from: 12 | #http://www.puntoporpunto.com/informacion-general/en_juarez_suman_mil_13_asesina.php 13 | #and 14 | #http://www.larednoticias.com/detalle.cfm?s=26 15 | 16 | #I couldn't get this song out of my head 17 | #Nomberr uan, Nomberr uan, haha :'( 18 | 19 | #Aca en el norte la vida sigue, 20 | #a pesar de reinar la ley del oeste. 21 | 22 | #I come from the land 23 | #of the macho man 24 | #y las putas y los narcos 25 | #y los cholos y mojados, 26 | #es mi forntera tan violenta 27 | #que la vida te renta. 28 | 29 | #Aca en el norte la vida sigue, 30 | #a pesar de reinar la ley del oeste. 31 | 32 | #You don`t have to be over 21, 33 | #to drink all that you can 34 | #and fuck with who you want. 35 | #Welcome to the town of the lost souls, 36 | #the lonely, the drugs 37 | #and the beautiful, beatiful girls. 38 | 39 | #Aca en el norte la vida sigue, 40 | #a pesar de reinar la ley del mas fuerte. 41 | 42 | #Es mi ciudad, don`t let me down. 43 | #I love my town. 44 | #`Cause Ciudad Juarez is the number one. 45 | #Es mi ciudad, donde hay lealtad. 46 | #Amo ese lugar. 47 | #`Cause ciudad Juarez is the number one. 48 | #Number one, number one, number one. 49 | 50 | #Es mi ciudad, don`t let me down... 51 | 52 | source("timelines/constants.r") 53 | source("library/utilities.r") 54 | 55 | hom <- read.csv(bzfile("timelines/data/county-month.csv.bz2")) 56 | cdjuarez09 <- c(136, 240, 73, 90, 125, 247, 248, 337, 57 | 304, 290, 374, 317, 227, 163, 203, 240, 253, 303) 58 | #265, 127, 253, 180, 253) 59 | cdjuarez0708 <- subset(hom, 60 | Code == "08 037" & 61 | (Year.of.Murder == "2008" | 62 | Year.of.Murder =="2007") & 63 | Month.of.Murder != "Total") 64 | cdjuarez0708$Tot <- apply(cdjuarez0708[ , 5:ncol(cdjuarez0708)], 1, sum, na.rm = T) 65 | 66 | #Estimate the monthly population 67 | pop0709 <- c(1359787, 1384102, 1407849, 1431072) 68 | pop <- data.frame(month=rep(1:12,4), year=rep(2007:2010, each=12)) 69 | pop$Monthly[pop$month == 6] <- pop0709 70 | pop$MonthlyEst <- na.spline(pop$Monthly, na.rm=TRUE) 71 | 72 | 73 | #A sequence of dates starting at the end of the month 74 | start <- as.Date(as.Date("2007/2/1")) 75 | next.mon <- seq(start, length= length(cdjuarez0708) + 76 | length(cdjuarez09), 77 | by='1 month') 78 | date.end <- next.mon - 1 79 | 80 | dates.mid <- seq(as.Date("2007/01/15"), length= length(cdjuarez0708) + 81 | length(cdjuarez09), 82 | by='1 month') 83 | 84 | cdj <- data.frame(Murders = c(cdjuarez0708$Tot, cdjuarez09), 85 | DateEnd = date.end, Date = dates.mid) 86 | 87 | #Anualized murder rate 88 | cdj$rate <- (cdj$Murders / pop$MonthlyEst[1:nrow(cdj)]) * 100000 * 12 89 | 90 | cdj$group <- cutDates(cdj, c(op.chi, cdj.rein, calderon, consulate, 91 | police)) 92 | 93 | Cairo(file = "timelines/output/ciudad-juarez.png", width=800, height=400) 94 | print(ggplot(cdj, aes(Date, rate)) + 95 | geom_point(aes(size = Murders), color = "darkred") + 96 | geom_vline(aes(xintercept = op.chi), alpha = .7) + 97 | geom_text(aes(x,y, label = "Joint Operation Chihuahua"), 98 | data = data.frame(x = op.chi, y = 152), 99 | size = 4, hjust = 1.01, vjust = 0) + 100 | geom_vline(aes(xintercept = cdj.rein), alpha = .7) + 101 | geom_text(aes(x,y, label = "Reinforcements sent"), 102 | data = data.frame(x = cdj.rein, y = 252), 103 | size = 4, hjust = 1.01, vjust = 0) + 104 | geom_vline(aes(xintercept = calderon), alpha = .7) + 105 | geom_text(aes(x,y, label = "Presidential visit"), 106 | data = data.frame(x = calderon, y = 55), 107 | size = 4, hjust = 1.01, vjust = 0) + 108 | #geom_vline(aes(xintercept = consulate), alpha = .7) + 109 | #geom_text(aes(x,y, label = "Consulate killings"), 110 | # data = data.frame(x = consulate, y = 35), 111 | # size = 4, hjust = 1.01, vjust = 0) + 112 | geom_vline(aes(xintercept = police), alpha = .7) + 113 | geom_text(aes(x,y, label = "Police handover"), 114 | data = data.frame(x = police, y = 15), 115 | size = 4, hjust = 1.01, vjust = 0) + 116 | geom_smooth(aes(group = group), se = FALSE, method = lm) + 117 | scale_size("Number of\nHomicides") + 118 | ylab("Annualized homicide rate") + xlab("") + 119 | opts(title = "Homicide rates in Ciudad Juarez before and after the army took control")) 120 | dev.off() 121 | 122 | ######################################################## 123 | #Structural Change Tests 124 | ######################################################## 125 | rate <- ts(cdj$rate, start=2007, freq=12) 126 | ndays <- strptime(cdj$DateEnd, format = "%Y-%m-%d")$mday 127 | 128 | fd <- Fstats(rate ~ ndays) 129 | sctest(rate ~ ndays, type = "Chow", point = 15) 130 | 131 | op.chi 132 | cdj.rein 133 | summary(glm(rate ~ ndays)) 134 | bp.cdj <- breakpoints(rate ~ ndays, h = 4, breaks = 2) 135 | confint(bp.cdj, breaks = 3) 136 | -------------------------------------------------------------------------------- /trends/seasonal-decomposition.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sun Mar 28 11:14:38 2010 5 | ######################################################## 6 | #season and trend decomposition of the monthly murder rates in Mexico 7 | 8 | source("library/utilities.r") 9 | source("timelines/constants.r") 10 | 11 | plotReg <- function(df){ 12 | hom.ts <- ts(df$rate, start=1990, freq = 12) 13 | trend = time(hom.ts) 14 | ndays <- strptime(df$date, format = "%Y-%m-%d")$mday 15 | reg <- glm(rate ~ trend + factor(month) + ndays, data = df) 16 | reg2 <- glm(rate ~ trend + factor(year) + factor(month) + 17 | ndays, data = df) 18 | reg3 <- glm(rate ~ trend + ndays, data = df) 19 | print(anova(reg, reg2)) 20 | print(summary(reg)) 21 | print(summary(reg2)) 22 | print(summary(reg3)) 23 | df$fitted <- unlist(reg$fitted.values) 24 | df$fitted <- fitted(reg) 25 | print(ggplot(df, aes(as.Date(date), rate)) + 26 | geom_line() + 27 | geom_line(aes(as.Date(date), fitted, 28 | legend = FALSE), color = "blue") + 29 | scale_x_date(major ="year") + 30 | opts(title = reg$call)) 31 | } 32 | 33 | plotTrend <- function(df, ban){ 34 | start.dw <- op.mich 35 | end.dw <- as.Date("2008-12-31") 36 | print(ggplot(df, aes(as.Date(date), rate)) + 37 | geom_rect(xmin = as.numeric(start.dw), xmax = as.numeric(end.dw), 38 | ymin=0, ymax=Inf, alpha = .01, fill = "pink") + 39 | geom_line(size = .2) + 40 | geom_line(aes(as.Date(date), trend), color = "blue") + 41 | geom_vline(aes(xintercept = as.Date("2004-09-13")), color = "gray", 42 | linetype = 2) + 43 | scale_x_date() + 44 | xlab("") + ylab("Annualized Homicide Rate") + 45 | opts(title = "Monthly Homicide Rate and Trend (the Gray Line is the Assault Weapon Ban Expiration Date)") + 46 | annotate("text", x = as.numeric(start.dw), y = 16.9, 47 | label = "Drug War", hjust =-.2)) 48 | } 49 | 50 | plotSeasonal <- function(df){ 51 | months <- factor(format(as.Date(df$date), "%b"))[1:12] 52 | print(ggplot(df[1:12,], aes(1:12, seasonal), group = 1) + 53 | geom_line() + 54 | scale_x_continuous(breaks = 1:12, 55 | labels = months) + 56 | xlab("") + 57 | opts(title = "Seasonal Component of the Homicide Rate") + 58 | geom_hline(yintercept=0, color = "gray70")) 59 | } 60 | 61 | hom <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 62 | hom <- cleanHom(hom) 63 | hom <- addMonths(hom) 64 | #hom <- subset(hom, Year.of.Murder >= 1994) 65 | 66 | 67 | #I can't see any clearcut paterns at the state level 68 | ggplot(hom, aes(y = Total.Murders, x = Month.of.Murder, 69 | group = Year.of.Murder, color = Year.of.Murder)) + 70 | geom_line() + 71 | facet_wrap(~ County, scales = "free_y") 72 | 73 | #Now I can see them 74 | print(ggplot(hom, aes(as.Date(Date), Total.Murders)) + 75 | geom_line() + 76 | scale_x_date() + 77 | facet_wrap(~ County, scales = "free_y") + 78 | opts(title = "Monthly Number of Homicides")) 79 | dev.print(png, "trends/output/st-murders.png", width = 960, height = 600) 80 | 81 | #Now only since the start of the Drug War 82 | print(ggplot(subset(hom, as.Date(Date) >= as.Date("2006/12/01")), 83 | aes(as.Date(Date), Total.Murders)) + 84 | geom_line() + 85 | scale_x_date() + 86 | facet_wrap(~ County, scales = "free_y")+ 87 | opts(title = "Monthly Number of Homicides Since the Start of the Drug War")) 88 | dev.print(png, "trends/output/st-drug-war-murders.png", width = 960, height = 600) 89 | 90 | #Let's see what Chiapas looked like during the 95 Acteal massacre 91 | print(ggplot(subset(hom, as.Date(Date) <= as.Date("1998/06/01") & 92 | as.Date(Date) >= as.Date("1997/01/01") & 93 | County == "Chiapas"), 94 | aes(as.Date(Date), Total.Murders)) + 95 | geom_line() + 96 | scale_x_date() + 97 | facet_wrap(~ County, scales = "free_y")+ 98 | opts(title = "Monthly Number of Homicides Since the Start of the Drug War")) 99 | 100 | 101 | #STL decomposition with loess 102 | pop <- monthlyPop() 103 | homrate <- addHom(hom, pop) 104 | homrate <- addTrend(homrate) 105 | homrate <- subset(homrate, year >= 1994) 106 | 107 | plotReg(homrate) 108 | dev.print(png, "trends/output/regression.png", width = 800, 109 | height = 600) 110 | 111 | Cairo(file = "trends/output/trend.png", width = 960, height=600) 112 | plotTrend(homrate) 113 | dev.off() 114 | 115 | plotSeasonal(homrate) 116 | dev.print(png, "trends/output/seasonal.png", width = 450, height = 300) 117 | 118 | 119 | 120 | 121 | 122 | ######################################################## 123 | #Bunch of crappy tests 124 | ######################################################## 125 | #See if the residuals are normal 126 | hom.ts <- ts(homrate$rate, start=1990, freq = 12) 127 | plot(stl(hom.ts, "per")) 128 | dhom <- diff(hom.ts) 129 | plot(dhom) 130 | shapiro.test(dhom) 131 | hist(dhom) 132 | #12 month lag 133 | lag.plot(dhom, 40) 134 | 135 | 136 | #Arima 137 | #fit.ar <- arima(hom.ts,order=c(1,1,1)) 138 | #tsdiag(fit.ar) 139 | #Box.test(fit.ar$residuals) 140 | #plot(hom.ts, xlim=c(1990,2010), ylim=c(5,19), type = "l") 141 | #hom.pred <- predict(fit.ar, n.ahead = 12) 142 | #lines(hom.pred$pred, col="red") 143 | #lines(hom.pred$pred + 2 * hom.pred$se, col="red", lty=3) 144 | #lines(hom.pred$pred - 2 * hom.pred$se, col="red", lty=3) 145 | 146 | -------------------------------------------------------------------------------- /missing-homicides/readme.md: -------------------------------------------------------------------------------- 1 | How reliable are Mexican Homicide Statistics? 2 | ================================================ 3 | 4 | There are 2 main sources of homicide statistics in Mexico, the vital 5 | statistics from the INEGI, and the police data which are available at 6 | the ICESI (a civic association not affiliate with the 7 | government which gets the data from the SNSP [Secretaría Nacional de Seguridad Pública]). But as it turns out the data from the ICESI is only an estimate 8 | (even if the secretary of the interior uses the data in his presentations 9 | as if it were the number of homicides), 10 | still, you can download the final numbers from the Statistical Yearbooks of 11 | each state. Since both data sources are available at the state level you can 12 | compare them and see if there are any differences. 13 | 14 | The plot gets thicker: it turns out the data from the ICESI for 2008 are only 15 | estimates (missing over 1,100 homicides in Chihuahua, looks like it was a mid-year 16 | "estimate"). The INEGI releases a series of statistical yearkbooks for each state 17 | that contain the final (final being over 90% reported) data according to the local police forces (homicide is a 18 | local crime in Mexico). I didn't include the statistical yearbooks because they're 19 | about 250MB, but in the directory "stat-yearbooks" you'll find a script to download them 20 | 21 | Even thicker: the data from the ICESI and the statistical yearbooks are not even 22 | homicides, but police reports. So if 18 kids are killed in a massacre at the same 23 | time, and there's only one police report, they are recorded as one. The statistical 24 | yearbooks do have the data available by number of victims, but they combine "homicidios 25 | dolosos" (homicide) with "homicidios culposos" (manslaughter). I wonder, wonder why 26 | they would do this? 27 | 28 | Data are also available from PAHO and the UN, but only at the national level. 29 | 30 | Since the INEGI database failed to follow Benford's law I also checked to see if some of the big massacres that occurred in Mexico were recorded, not surprisingly some of them were not in the database 31 | 32 | Output 33 | ------ 34 | * Chart of homicides rate according to the INEGI, ICESI, PAHO, and the UN Crime Survey. 35 | * Bar plot of the differences in number if homicides, one for the ICESI data and another for the final data from the Statistical Yearbooks. 36 | * Scatter plot of the different homicide data (INEGI vs ICESI). That is see if the police records (labeled ICESI) match those of the vital statistics system(labeled INEGI). The police records are missing over 1,100 homicides in Chihuahua alone. 37 | * Scatter plot against the proportions. 38 | * A csv file with the number of homicides in the months during which there were massacres 39 | 40 | Sources 41 | ------ 42 | __Mexican Homicide Data:__ 43 | 44 | [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) 45 | 46 | [ICESI](http://www.icesi.org.mx/documentos/estadisticas/estadisticas/denuncias_homicidio_doloso_1997_2008.xls) 47 | 48 | [Statistical Yearbooks](http://www.inegi.org.mx/est/contenidos/espanol/sistemas/sisnav/selproy.aspx): The following states were not available as of Feb-17-2009, so I used the estimates that appear in the file from the ICESI: Durango, Tlaxcala, Yucatan and Nayarit. Furthermore, there was a mistake in the statistical yearbook for Querétaro (the values from "homicidios dolosos"[homicide] and "homicidios culposos"[manslaughter] was transposed). 49 | 50 | [PAHO](http://www.paho.org/English/SHA/coredata/tabulator/newTabulator.htm) Pan American Health Organization, Health Analysis and Statistics Unit. Regional Core Health Data Initiative; Technical Health Information System. Washington DC, 2007. 51 | 52 | UN Crime Survey: Eight and Seventh United Nations Survey of Crime Trends and Operations of Criminal Justice Systems 53 | 54 | __Population Data:__ 55 | 56 | [Indicadores demográficos básicos 1990-2030](http://www.conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 57 | 58 | 59 | Massacres 60 | ---------- 61 | 62 | [Aguas Blancas Massacre](http://www.sfgate.com/chronicle/special/mexico/massacre.html): (Warning: Graphic Video) 17 dead, June 28, 1995. In the INEGI homicide database 63 | 64 | [Acteal Massacre](http://zedillo.presidencia.gob.mx/pages/chiapas/docs/crono.html): 45 dead ,December 22, 1997. __Not in the INEGI homicide database__ 65 | 66 | [Decapitated Bodies in Yucatán](http://www2.esmas.com/noticierostelevisa/mexico/009070/hallan-doce-cadaveres-decapitados-yucatan): August 28 2008, 12 dead. In the INEGI homicide database 67 | 68 | [Arizpe-Cananea](http://www.oem.com.mx/esto/notas/n279931.htm): May 16, 2007, 22 dead. In the INEGI homicide database 69 | 70 | [Tijuana prison riot](http://news.newamericamedia.org/news/view_article.html?article_id=413e55db3c6d5eac317d63edb8ce03d8): September 2008, 25 dead. In the INEGI homicide database 71 | 72 | [Reynosa prison riot](http://www.horacerotam.com/Not_interior1.asp?Id=NHCT22047&link=280): October 2008, 21 dead. In the INEGI homicide database 73 | 74 | [Ensenada](http://articles.latimes.com/1998/sep/19/news/mn-24394): September 19, 1998, 18 dead. In the INEGI homidice database 75 | 76 | [24 dead outside Mexico City](http://www.nytimes.com/2008/09/14/world/americas/14mexico.html?_r=1): September 13, 2008, 24 dead. In the INEGI homicide database 77 | 78 | 79 | CodeBook: 80 | --------- 81 | INEGIvsICESI.csv 82 | 83 | Colums: 84 | State | INEGI | ICESI | Abbrv | Anuario | Stat.Yrbks 85 | 86 | State - Name of the State 87 | 88 | INEGI - Number of homicides according to the INEGI 89 | 90 | ICESI - Number of homicides according to the ICESI (which takes the data from SNSP) 91 | 92 | Anuario - The raw values according to the Statistical Yearbooks 93 | 94 | Stat.Yrbks - The values from the Statistical Yearbooks if available, if not, the data is from the ICESI 95 | 96 | -------------------------------------------------------------------------------- /most-violent-counties/most-violent.r: -------------------------------------------------------------------------------- 1 | ######################################################## ##### Author: Diego Valle Jones ##### Website: www.diegovalle.net ##### Date Created: Sat Feb 27 22:38:37 2010 ######################################################## #The counties with the highest homicide rates for women and men #data source: Estadísticas Vitales INEGI removeCommas <- function(hom, col2cvt) { hom[,col2cvt] <- lapply(hom[,col2cvt], function(x){as.numeric(gsub(",", "", x))}) hom } cleanHom <- function(hom) { names(hom)[1:4] <- c("Code","County","Year.of.Murder","Sex") hom$County <- iconv(hom$County, "windows-1252", "utf-8") hom$Code <- iconv(hom$Code, "windows-1252", "utf-8") hom <- hom[-grep("=CONCATENAR", hom$Code),] hom <- hom[-grep("Total", hom$County),] hom <- hom[-grep("No especificado", hom$County),] hom <- hom[-grep("Total", hom$Year.of.Murder),] hom <- hom[-grep("No especificado", hom$Year.of.Murder),] hom <- hom[-grep("Total", hom$Sex),] hom$X.4 <- NULL hom$Year.of.Murder <- as.numeric(as.numeric(gsub('[[:alpha:]]', '', hom$Year.of.Murder))) hom <- subset(hom, Year.of.Murder >= 1990) col2cvt <- 5:ncol(hom) hom <- removeCommas(hom, col2cvt) hom[is.na(hom)] <- 0 hom$tot <- apply(hom[ , col2cvt], 1, sum) hom$CLAVE <- as.numeric(gsub(" ", "", hom$Code)) hom } cleanPop <- function(filename, sex) { pop <- read.csv(filename) pop <- na.omit(pop) col2cvt <- 3:ncol(pop) pop[,col2cvt] <- lapply(pop[ ,col2cvt], function(x){as.numeric(gsub(" ", "", x))}) pop$Sex <- c(sex) names(pop)[1:2] <- c("CLAVE", "Mun") popm <- melt(pop, id = c("CLAVE", "Mun", "Sex")) popm$Mun <- iconv(popm$Mun, "windows-1252", "utf-8") popm$variable <- as.numeric(substring(popm$variable, 2)) popm } #Some counties in Oaxaca have changed recently so we have to merge #them by name instead of code joinChangedMun <- function(hom, popm, hom.popm) { changed <- setdiff(hom$CLAVE, popm$CLAVE) hom.ch <- subset(hom, CLAVE %in% changed) hom.popm.ch <- merge(hom.ch, popm, by.x = c("County", "Year.of.Murder", "Sex"), by.y = c("Mun", "variable", "Sex")) hom.popm.ch$CLAVE.x <- NULL hom.popm.ch$Mun <- hom.popm.ch$County names(hom.popm.ch)[25] <- "CLAVE" hom.popm <- rbind(hom.popm, hom.popm.ch) hom.popm$rate <- (hom.popm$tot / hom.popm$value) * 100000 hom.popm } joinHomPop <- function(hom,popm){ hom.popm <- merge(hom, popm, by.x =c("CLAVE", "Year.of.Murder", "Sex"), by.y = c("CLAVE", "variable", "Sex"), all.y = TRUE) hom.popm[is.na(hom.popm$tot), ]$tot <- 0 hom.popm <- joinChangedMun(hom, popm, hom.popm) } getMeans <- function(hom.plot) { ddply(hom.plot, .(CLAVE), function(df) mean(df$rate)) } getMun <- function(df, mostviol, size){ hom.plot <- subset(df, value > size) means <- getMeans(hom.plot) if(mostviol) { means <- means[order(-means$V1), ] } else { means <- means[order(means$V1), ] } high.murder <- unique(means$CLAVE[1:25]) high.murder } plotRate <- function(df, title="", mostviol = TRUE){ size <- 50000 hom.plot <- subset(df, CLAVE %in% getMun(df, mostviol, size)) means <- getMeans(hom.plot) hom.plot <- merge(means, hom.plot, by = "CLAVE") hom.plot <- ddply(hom.plot, .(CLAVE), transform, max = max(rate)) ifelse(mostviol, hom.plot$Mun <- with(hom.plot, reorder(factor(Mun), -max)), hom.plot$Mun <- with(hom.plot, reorder(factor(Mun), max))) if(9015 %in% hom.plot$CLAVE){ hom.plot$Mun <- factor(hom.plot$Mun, levels = c(levels(hom.plot$Mun), "Benito Juarez (Cancun)", "Cuauhtémoc DF")) #hom.plot[hom.plot$CLAVE == 23005, ]$Mun = "Benito Juarez (Cancun)" hom.plot[hom.plot$CLAVE == 9015, ]$Mun = "Cuauhtémoc DF" } ggplot(hom.plot, aes(Year.of.Murder, rate)) + geom_line() + geom_point(aes(size = tot)) + facet_wrap(~ Mun) + theme_bw() + geom_hline(aes(yintercept = V1), linetype = 2, color="gray70") + scale_x_continuous(breaks = c(2005,2008)) + # labels = c("05","06","07", "08")) + opts(title = title) + ylab("Homicide rate") + xlab("") + opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + scale_size("Number of\nHomicides") } hom <- read.csv(bzfile("states/data/homicide-mun-2008.csv.bz2"), skip=4) hom <- cleanHom(hom) #for men popm <- cleanPop("most-violent-counties/data/poblacionh.csv.bz2", "Hombre") hom.popm <- joinHomPop(hom, popm) #for women popf <- cleanPop("most-violent-counties/data/poblacionm.csv.bz2", "Mujer") hom.popf <- joinHomPop(hom, popf) print(plotRate(hom.popm, "Most violent municipalities for men (with more than 50,000 men)", TRUE)) dev.print(png, file="most-violent-counties/output/Most violent municipalities for men.png", width=800, height=600) print(plotRate(hom.popf, "Most violent municipalities for women (with more than 50,000 women)", TRUE)) dev.print(png, file="most-violent-counties/output/Most violent municipalities for women.png", width=800, height=600) print(plotRate(hom.popm, "Least violent municipalities for men (with more than 50,000 men)", FALSE)) dev.print(png, file="most-violent-counties/output/Least violent municipalities for men.png", width=800, height=600) print(plotRate(hom.popf, "Least violent municipalities for women (with more than 50,000 women)", FALSE)) dev.print(png,file="most-violent-counties/output/Least violent municipalities for women.png", width=800, height=600) juar <- subset(hom, CLAVE == "8037" & Sex == "Mujer") ggplot(juar, aes(Year.of.Murder, tot)) + geom_line() #According to the movie "on the edge" there were 437 femicides #(I assume from 1993 to 2006, when the movie was released) #http://political.detritus.net/juarez/ sum(subset(juar, Year.of.Murder > 2006 & Year.of.Murder < 2008)$tot) #108 - 2008 #19 - 2007 -------------------------------------------------------------------------------- /predictions/predictions.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Fri Apr 02 20:35:30 2010 5 | ######################################################## 6 | #What will the homicide rate be in 2009 and 2010 7 | 8 | source("library/utilities.r") 9 | 10 | 11 | 12 | saveplotRegM <- function(hexe){ 13 | print(ggplot(hexe, aes(Executions, murders, label = tmon)) + 14 | geom_text(hjust=-.1) + 15 | geom_point() + 16 | stat_smooth(method = lm)) 17 | dev.print(png, "predictions/output/exe-hom.png", 18 | width = 450, height = 300) 19 | } 20 | 21 | regM <- function(df, executions, saveplot = FALSE){ 22 | #The murder rate for january and february was low cause it was low 23 | #in Mexico City, which isn't beset by the drug war, so we exlude it. 24 | #Alos exclude December 2008 because it will be off by 25% or so 25 | h07.08 <- df[208:227,] 26 | 27 | #The data for november looks wrong, so I'm using the average 28 | #exe[35, "Milenio"] <- (exe[34, "Milenio"] + exe[36, "Milenio"])/2 29 | hexe <- merge(h07.08, executions, by.x=c("year", "month"), 30 | by.y=c("Year", "Month")) 31 | hexe <- hexe[order(hexe$year, hexe$month),] 32 | hexe$tmon <- paste(factor(format(as.Date(hexe$date), "%b")), 33 | hexe$year) 34 | reg <- lm(murders ~ Executions, data = hexe) 35 | print(summary(reg)) 36 | if(saveplot == TRUE) saveplotRegM(hexe) 37 | reg 38 | } 39 | 40 | murderRate <- function(vec, pop = 107550697){ 41 | (sum(vec)) / pop * 100000 42 | } 43 | 44 | predict09 <- function(reg) { 45 | e2009 <- subset(exe, Year == 2009) 46 | 47 | pre09 <- data.frame(predict(reg, 48 | data.frame(Executions = 49 | e2009$Executions), 50 | interval = "confidence")) 51 | res <- sapply(pre09, murderRate) 52 | res 53 | } 54 | 55 | 56 | 57 | predictChart <- function(exe, homrate) { 58 | pre10 <- data.frame(predict(reg, 59 | data.frame(Executions = 60 | exe$Executions[25:39]), 61 | interval = "confidence")) 62 | pre10$date <- monthSeq("2009/01/15", 15) 63 | 64 | pop <- c(homrate$Monthly.Pop, rep(NA,11), 65 | 107550697, rep(NA,11), 108396211) 66 | 67 | pre10$pop <- na.spline(pop, na.rm=FALSE)[235:249] 68 | pre10 <- rbind(pre10, data.frame(fit = homrate[228, "murders"], 69 | lwr = homrate[228, "murders"], 70 | upr = homrate[228, "murders"], 71 | date = homrate[228, "date"], 72 | pop = homrate[228, "Monthly"])) 73 | pre10[1:3] <- sapply(pre10[1:3], 74 | function(x) x / pre10$pop * 100000 * 12) 75 | pre10 76 | } 77 | 78 | exeRate <- function(df, population){ 79 | df$date <- monthSeq("2007/01/15", 39) 80 | df$pop <- population 81 | df$rate <- df$Executions / df$pop * 100000 * 12 82 | df 83 | } 84 | 85 | 86 | homRate2010 <- function(pre10, homrate){ 87 | rate08.09 <- data.frame(rate = 88 | c(homrate$rate[214:228], pre10$fit[1:15]), 89 | date = 1:30) 90 | reg10 <- lm(rate ~ date, data = rate08.09) 91 | x <- predict(reg10, data.frame(date = 25:36), 92 | interval = "confidence") 93 | pre10[13:14,1:3] 94 | all10 <- rbind(x[3:12,], pre10[13:14,1:3]) 95 | apply(all10,2,mean) 96 | } 97 | 98 | plotHomEx <- function(pre10, exe, homrate) { 99 | label09 <- paste("homicide rate\nin 2009 ~",round(k2009.rate[[1]],1)) 100 | label10 <- paste("homicide rate\nin 2010 ~",round(k2010.rate[[1]],1)) 101 | ggplot(pre10, aes(as.Date(date), fit)) + 102 | scale_x_date() + 103 | geom_line(linetype = 2, color = "darkred") + 104 | geom_line(data = homrate[204:228,], aes(as.Date(date), rate), 105 | color = "darkred") + 106 | geom_ribbon(aes(ymax = upr, ymin = lwr), alpha = .2, 107 | fill ="darkred") + 108 | xlab("") + ylab("Annualized Homicide Rate") + 109 | annotate("text", x = as.numeric(as.Date("2007-07-01")), y = 27, 110 | label = "homicide rate\nin 2007 = 8.3") + 111 | annotate("text", x = as.numeric(as.Date("2008-07-01")), y = 27, 112 | label = "homicide rate\nin 2008 = 12.8") + 113 | annotate("text", x = as.numeric(as.Date("2009-07-01")), y = 27, 114 | label = label09) + 115 | annotate("text", x = as.numeric(as.Date("2010-05-15")), y = 27, 116 | label = label10) + 117 | annotate("text", x = as.numeric(as.Date("2010-04-15")), y = 22.5, 118 | label = "homicide\nrate", hjust =0, color = "darkred") + 119 | annotate("text", x = as.numeric(as.Date("2010-04-15")), y = 12, 120 | label = "execution\nrate", hjust =0, color ="darkgreen") + 121 | geom_line(data = exe, aes(as.Date(date), rate), color = "darkgreen") + 122 | scale_x_date(limits = c(as.Date("2006-11-01"), 123 | as.Date("2010-08-01"))) + 124 | scale_y_continuous(limits = c(1.5, 28)) + 125 | opts(title = "Monthly Homicide and Execution Rates") 126 | } 127 | 128 | savePlot <- function(p) { 129 | Cairo(file = "predictions/output/estimate.png", w = 640, h = 480) 130 | print(p) 131 | dev.off() 132 | } 133 | 134 | 135 | #Let's explore the differences in executions reported by the 136 | #newpapers Milenio and Reforma 137 | exe.st <- read.csv("predictions/data/executions-bystate.csv") 138 | exe.st$Universal <- NULL 139 | exe.st <- melt(exe.st, id = "State") 140 | exe.st$State <- with(exe.st,reorder(State, value)) 141 | exe.st$State <- iconv(exe.st$State, "windows-1252", "utf-8") 142 | print(ggplot(exe.st, aes(value, State, group = variable, 143 | color = variable, shape = variable)) + 144 | geom_point() + 145 | opts(title = "Differences in Reported Number of Executions in 2009")) 146 | dev.print(png, "predictions/output/diff-execut2009.png", 147 | width = 500, height = 600) 148 | 149 | #Prepare the data 150 | hom <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 151 | hom <- cleanHom(hom) 152 | hom <- addMonths(hom) 153 | pop <- monthlyPop() 154 | homrate <- addHom(hom, pop) 155 | homrate <- addTrend(homrate) 156 | 157 | #The predictions 158 | exe <- read.csv("predictions/data/executions-bymonth.csv") 159 | ggplot(exe, aes(1:39, Milenio)) + 160 | geom_line(color = "blue") + 161 | geom_line(aes(1:39, Reforma), color = "red") 162 | exe$diff <- exe$Reforma - exe$Milenio 163 | #I couldn't find the data for March so here's and estimated 164 | exe$Reforma[39] <- exe$Milenio[39] + mean(exe$diff[32:38]) 165 | exe$Executions <- (exe$Reforma + exe$Milenio) /2 166 | 167 | reg <- regM(homrate, exe, saveplot = TRUE) 168 | #plot(reg) 169 | durbinWatsonTest(reg) 170 | adf.test(residuals(reg)) 171 | 172 | k2009.rate <- predict09(reg) 173 | print(round(k2009.rate,1)) 174 | pre10 <- predictChart(exe, homrate) 175 | k2010.rate <- homRate2010(pre10, homrate) 176 | print(round(k2010.rate,1)) 177 | exe <- exeRate(exe, c(homrate$Monthly[205:228], pre10$pop[1:15])) 178 | 179 | savePlot(plotHomEx(pre10, exe, homrate)) 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /missing-homicides/missing-homicides.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Fri Feb 05 20:34:20 2010 5 | ######################################################## 6 | #1. Time series of the different agencies that collect 7 | #homicide data in Mexico 8 | #2. Scatter plot of the INEGI homicide data vs the ICESI. 9 | #Chihuahua is a big outlier 10 | #3. Bar plot of the differences 11 | #4. Scatter plot against the proportions 12 | 13 | savePlotAA <- function(p, filename, width = 960, height = 600){ 14 | Cairo(file = filename, width=width, height=height) 15 | print(p) 16 | dev.off() 17 | } 18 | 19 | ######################################################## 20 | # Line plot of PAHO, UN, INEGI and ICESI homicide rates 21 | ######################################################## 22 | homts <- read.csv("missing-homicides/data/PAHO-UN-INEGI-ICESI.csv") 23 | homts <- subset(homts, Year >= 1994) 24 | names(homts)[4] <- "SNSP" 25 | p <- ggplot(melt(homts, id="Year"), aes(Year, value, group = variable, 26 | color = variable)) + 27 | geom_line(alpha = .7, size = 1.2) + 28 | ylab("Homicide rate") + 29 | opts(title = "Different estimates of the \'homicide\' rate") 30 | savePlotAA(p, file = "missing-homicides/output/PAHO-UN-INEGI-SNSP.png", width = 640, height = 480) 31 | 32 | #FADE IN: 33 | #LOWLY GOVERNMENT OFFICIAL Fernando is on the phone with a POWERFUL 34 | #MEXICAN POLITICIAN named Felipe 35 | 36 | # POWERFUL MEXICAN POLITICIAN 37 | 38 | #(into phone) 39 | #What the heck are you doing! 40 | 41 | 42 | 43 | # FERNANDO 44 | 45 | #(into phone) 46 | #I'm deleting over 1,100 hundred murders from the police database boss 47 | 48 | 49 | 50 | # POWERFUL MEXICAN POLITICIAN 51 | 52 | #(into phone) 53 | #Amigo: 2010 will be the 100th anniversary of the Mexican Revolution and the 200th anniversary of the Independence War, perhaps we should mark the occasion in a special way 54 | 55 | 56 | # FERNANDO 57 | 58 | #(into phone) 59 | #Yes Jefe, in 2010 I will delete 2,100 murders from the database 60 | #to mark the occasion 61 | 62 | 63 | # POWERFUL MEXICAN POLITICIAN 64 | 65 | #(into phone) 66 | #Ja, ja, ja, pinche bola de pendejos, nadie se va a dar cuenta de 67 | #lo que hicimos. 68 | #Oye, ya nos dijo a que estado le vamos a dar en la madre a la 69 | #siguiente. 70 | #(English Subtitles) 71 | #Please invest in Mexico, it is a safe an honest country 72 | 73 | #FADE TO: 74 | 75 | #EXT. INSIDE THE LOBBY OF A GOVERNMENT BUILDING - DAY 76 | 77 | #FERNANDO breaks into song backed up by ten "edacanes" 78 | 79 | #I am the very model of a modern Mexican politician 80 | #With many cheerful facts about the murder rate. 81 | #I'm quitting because of my party's political calculus; 82 | #I know the president of my party is an animalculous: 83 | #In short, in lying, cheating, and stealing 84 | #I am the very model of a modern Mexican politician 85 | 86 | # FADE OUT 87 | 88 | # THE END 89 | 90 | 91 | ######################################################## 92 | # Scatter Plot 93 | ######################################################## 94 | 95 | ivsi <- read.csv("missing-homicides/data/INEGIvsICESI.csv") 96 | #Get rid of the full name of the states (eg: Veracruz de 97 | #Ignacio de la Llave changes to Veracruz 98 | cleanNames <- function(df, varname = "County"){ 99 | df[[varname]] <- gsub("* de .*","", df[[varname]]) 100 | df[[varname]] 101 | } 102 | 103 | ivsi$State <- cleanNames(ivsi, "State") 104 | ivsi$State <- iconv(ivsi$State, "windows-1252", "utf-8") 105 | ivsi$Abbrv <- iconv(ivsi$Abbrv, "windows-1252", "utf-8") 106 | print(ggplot(ivsi, aes(INEGI, ICESI, 107 | label = paste(State," (", 108 | as.character(INEGI-ICESI), ")", sep = ""))) + 109 | geom_text(aes(size = sqrt(abs(INEGI-ICESI))), hjust=-.1) + 110 | geom_point() + 111 | geom_abline(slope=1, linetype=2, color="blue") + 112 | opts(title = "Differences in \'homicide\' reporting rates (INEGI - SNSP)") + ylab("SNSP") + 113 | scale_x_continuous(limits = c(0, 4000)) + 114 | opts(legend.position = "none") + 115 | annotate("text", 1400, 1400, label = "missing-homicides/data ara equal ->", 116 | color ="blue", hjust = 1)) 117 | dev.print(png, file = "missing-homicides/output/scatter-inegi-snsp.png", width = 600, height = 480) 118 | 119 | 120 | 121 | 122 | ######################################################## 123 | # Bar Plot 124 | ######################################################## 125 | drawBars <- function(df) { 126 | values <- cast(df) 127 | labels <- data.frame(State = values$State, 128 | variable = c("INEGI"), 129 | value = ifelse(values$INEGI > ivsi[ ,3], 130 | values$INEGI, 131 | values[,3]), 132 | missing = values[,2] - values[,3]) 133 | labels$missing <- ifelse(labels$missing < 0, 134 | as.character(labels$missing), 135 | as.character(paste("+", 136 | labels$missing, sep=""))) 137 | df$State <- with(df, reorder(factor(State), value)) 138 | ggplot(df, aes(x=State, y=value, group = variable, 139 | fill = variable)) + 140 | opts(title = "Differences in reported \'homicides\' (2008)") + 141 | geom_bar(stat = "identity", position = "identity", 142 | alpha = .5) + 143 | ylab("Number of Homicides") + 144 | geom_text(data = labels, aes(label = missing), hjust=-.1, 145 | color = "gray40") + 146 | scale_y_continuous(limits = c(0, 3000)) + 147 | coord_flip() 148 | } 149 | #ICESI data 150 | names(ivsi)[3] <- "SNSP" 151 | print(drawBars(melt(ivsi[ , c(1:2,3)], id="State"))) 152 | dev.print(png, file = "missing-homicides/output/INEGIvsSNSP.png", width = 480, height = 600) 153 | 154 | #data from the statistical yearbooks 155 | print(drawBars(melt(ivsi[ , c(1:2,6)], id="State"))) 156 | dev.print(png, file = "missing-homicides/output/INEGIvsYearbook.png", width = 480, height = 600) 157 | 158 | 159 | ######################################################## 160 | # Percentage Difference 161 | ######################################################## 162 | print(ggplot(ivsi, aes(INEGI, abs(INEGI - Stat.Yrbks) / 163 | (Stat.Yrbks + INEGI) / 2, label = Abbrv)) + 164 | geom_text(alpha = .6)+ 165 | #Not significant 166 | geom_smooth(method=lm, se = FALSE, color ="red") + 167 | scale_y_continuous(formatter = "percent") + 168 | xlab("Number of Homicides according to INEGI") + 169 | ylab("Percentage difference") + 170 | opts(title = "Statistical Yearbook and INEGI")) 171 | dev.print(png, file = "missing-homicides/output/INEGIvsStatYrbksprop.png", width = 480, height = 480) 172 | 173 | with(ivsi, { 174 | dep <- abs((INEGI - Stat.Yrbks) / 175 | ((Stat.Yrbks + INEGI) / 2) ) 176 | summary(lm(INEGI ~ dep, data = ivsi)) 177 | }) 178 | 179 | #How much did the number of homicide change. Compare the data from 180 | #the SNSP witht that of the statistical yearbooks 181 | print(ggplot(ivsi, aes(SNSP, 182 | Stat.Yrbks / SNSP, label = Abbrv)) + 183 | geom_text(alpha = .6)+ 184 | scale_y_continuous(formatter = "percent") + 185 | xlab("Number of Homicides according to ICESI") + 186 | ylab("Percentage extra according to the Statistical Yearbooks") + 187 | opts(title="SNSP and Statistical Yearbooks")) 188 | 189 | 190 | ######################################################## 191 | #Dot plot 192 | ######################################################## 193 | 194 | mivsi <- melt(ivsi[ , c(1:3,5)], id="State") 195 | mivsi <- na.omit(mivsi) 196 | mivsi$State <- with(mivsi, reorder(factor(State), value)) 197 | print(ggplot(mivsi, aes(value, State, group = variable, 198 | fill = variable)) + 199 | opts(title = "Differences in reported \'homicides\'") + 200 | xlab("Number of Homicides") + 201 | geom_point(aes(color = variable, shape = variable), 202 | size = 3, alpha = .5)) 203 | dev.print(png, file = "missing-homicides/output/INEGIvsSNSPvsYear.png", width = 480, height = 600) 204 | -------------------------------------------------------------------------------- /guns-executions/guns-executions.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Wed Mar 24 11:44:12 2010 5 | ######################################################## 6 | #Compare the homicide rate with the execution rate and the homicide 7 | #with firearm rate 8 | 9 | source("library/utilities.r") 10 | 11 | ######################################################## 12 | #Homicides, Homicides with Firearm, and Executions 13 | ######################################################## 14 | homr <- read.csv("accidents-homicides-suicides/output/homicide.csv") 15 | exe <- read.csv("guns-executions/data/firearm-executions.csv") 16 | 17 | #Average the data from Reforma and Milenio 18 | exe$Executions[4:8] <- exe$Renglones[4:8] 19 | exe$Executions[9] <- (exe$Reforma[9] + exe$Renglones[9]) / 2 20 | exe$Executions[10:12] <- (exe$Reforma[10:12] + exe$Milenio[10:12]) / 2 21 | exe <- exe[,1:4] 22 | 23 | exe$Homicides <- c(homr$Tot[9:19], NA) 24 | exer <- exe 25 | exer[,c(2,3,5)] <- sapply(exer[,c(2,3,5)], 26 | function(x) x / exer$Population * 100000) 27 | mexer <- melt(exer[,c(1:3,5)], id = "Year") 28 | mexer$variable <- factor(factor(mexer$variable), 29 | levels = c("Homicides", 30 | "Executions", 31 | "Firearm.Homicides")) 32 | 33 | p <- ggplot(mexer, aes(Year, value, group = variable, 34 | color = variable)) + 35 | geom_line() + ylab("Rate") + 36 | opts(title = "") + 37 | ylim(c(0, max(mexer$value, na.rm = TRUE))) 38 | mid.points <- dl.indep(data.frame(d[4,],hjust=-0.2,vjust=-0.8)) 39 | print(direct.label(p, mid.points)) 40 | dev.print(png, "guns-executions/output/homicides-executions.png", 41 | width = 500, height = 400) 42 | 43 | #murder with firearm as a percentage of total homicides, 44 | #the proportion of murders with firearm has risen mostly because it 45 | #has decreased *more slowly* than the overall homicide rate 46 | print(ggplot(exe, aes(Year, Firearm.Homicides / Homicides)) + 47 | geom_line() + 48 | scale_y_continuous(formatter = "percent"), 49 | limits = c(0,.63)) 50 | dev.print(png, "guns-executions/output/percent-by-firearm.png", 51 | width = 500, height = 400) 52 | 53 | 54 | 55 | ############################################################ 56 | #Small Multiples of Homicide and Homicide with Firearm Rates 57 | ############################################################# 58 | fir.state <- read.csv("guns-executions/data/firearm-hom-statetot.csv") 59 | state <- read.csv("accidents-homicides-suicides/output/states.csv") 60 | pop <- read.csv("conapo-pop-estimates/conapo-states.csv") 61 | 62 | fir.state$State <- iconv(fir.state$State, 63 | "windows-1252", 64 | "utf-8") 65 | 66 | #state$State <- iconv(state$State, 67 | # "windows-1252", 68 | # "utf-8") 69 | 70 | pop$State <- iconv(pop$State, 71 | "windows-1252", 72 | "utf-8") 73 | 74 | #pop$State fir.state$State state$State 75 | state <- merge(fir.state, state[,c(1,10:20)], by = "State") 76 | state <- merge(state, pop[ ,c(1,10:20)], by = "State") 77 | 78 | popclmns <- 24:34 79 | firclmns <- 2:12 80 | homclmns <- 13:23 81 | state[firclmns] <- state[firclmns] / state[, popclmns] * 100000 82 | state[homclmns] <- state[homclmns] / state[, popclmns] *100000 83 | 84 | mstate <- melt(state[,1:23], id ="State") 85 | mstate$type <- factor(rep(c("Firearm\nHomicides", "Homicides"), 86 | each = 32*11)) 87 | mstate$variable <- rep(1998:2008, each = 32) 88 | 89 | mstate$type <- factor(mstate$type, levels = rev(levels(mstate$type))) 90 | 91 | #Is the data cointegrated? 92 | #unitRoot <- function(df){ 93 | # f <- subset(df, type == "Firearm\nHomicides")$value 94 | # h <- subset(df, type == "Homicides")$value 95 | # reg <- lm(h ~ f) 96 | # ht <- adf.test(residuals(reg)) 97 | # ht 98 | #} 99 | #FFFFFFFFFFFFFFFCCCCCCCCC they are cointegrated 100 | #dlply(hom, .(County), unitRoot) 101 | 102 | #Simple error correction, with eight samples per state there's not much 103 | #info 104 | coint <- function(df){ 105 | f <- subset(df, type == "Firearm\nHomicides")$value 106 | h <- subset(df, type == "Homicides")$value 107 | coint.res <- residuals(lm(h ~ f)) 108 | coint.res <- coint.res[-c(7:8)] 109 | d_h <- diff(h) 110 | d_f <- diff(f) 111 | diff.dat <- data.frame(embed(cbind(d_h, d_f), 2)) 112 | colnames(diff.dat) <- c("d_h", "d_f", "d_h.1", "d_f.1") 113 | reg <- lm(d_h ~ coint.res + d_h.1 + d_f.1, data = diff.dat) 114 | print(df$State) 115 | print(summary(reg)) 116 | print(plot(coint.res), type = "l") 117 | } 118 | #Doesn't work :( 119 | dlply(mstate, .(State), coint) 120 | 121 | correl <- function(df){ 122 | f <- subset(df, type == "Firearm\nHomicides")$value 123 | h <- subset(df, type == "Homicides")$value 124 | cor(f, h)[1] 125 | } 126 | 127 | #mstate <- merge(mstate, ddply(mstate, .(State), correl), by = "State") 128 | mstate$State <- cleanNames(mstate, "State") 129 | mstate <- subset(mstate, State %in% c("Chihuahua", "Sinaloa", "Durango", "Sonora", "Guerrero", "Baja California","Michoacán", "Tamaulipas")) 130 | #mstate$State <- paste(mstate$State,"-", round(mstate$V1,2)) 131 | 132 | #mstate$State <- with(mstate, reorder(factor(State), dif)) 133 | scale_color <- scale_colour 134 | print(ggplot(mstate, aes(variable, value, 135 | color = type, group = type)) + 136 | geom_line() + 137 | facet_wrap(~ State) + 138 | ylab("Rate") + xlab("Year") + 139 | scale_x_continuous(breaks = c(2000, 2004, 2007), 140 | labels = c("00","04", "07")) + 141 | opts(title = "Homicides and Homicides with Firearm")) 142 | dev.print(png, "guns-executions/output/homicides-firearm-st.png", 143 | width = 600, height = 400) 144 | 145 | 146 | ############################################################### 147 | ##Now homicides committed with a firearm as a proportion of 148 | #all homicides 149 | ############################################################## 150 | pstate <- state[2:12] / state [13:23] 151 | pstate$State <- state$State 152 | mpstate <- melt(pstate, id = "State") 153 | mpstate$variable <- rep(1998:2008, each = 32) 154 | mpstate$State <- factor(cleanNames(mpstate, "State")) 155 | 156 | mpstate <- ddply(mpstate, .(State), transform, 157 | dif = value[5] - value[length(value)]) 158 | 159 | mpstate$State <- reorder(mpstate$State, mpstate$dif) 160 | print(ggplot(mpstate, aes(variable, value)) + 161 | geom_line() + 162 | facet_wrap(~ State) + 163 | ylab("Proportion") + xlab("Year") + 164 | scale_x_continuous(breaks = c(2000, 2006, 2008), 165 | labels = c("00","06", "08"), 166 | limits = c(2004,2008)) + 167 | scale_y_continuous(formatter = "percent") + 168 | stat_smooth(method = lm, se = FALSE) + 169 | opts(title = "Proportion of Homicides commited with a Firearm (ordered by difference in proportions from 2004 to 2008)")) 170 | dev.print(png, "guns-executions/output/homicides-firearm-st-p2005.png", 171 | width = 960, height = 600) 172 | 173 | 174 | mpstate <- subset(mpstate, State %in% c("Chihuahua", "Sinaloa", "Durango", "Sonora", "Guerrero", "Baja California","Michoacán", "Tamaulipas")) 175 | m <- function(df){ 176 | lm(df$variable ~ df$value)$coef[2] 177 | } 178 | mpstate <- merge(mpstate, ddply(mpstate, .(State), m), 179 | by = c("State")) 180 | mpstate$State <- reorder(mpstate$State, mpstate$"df$value") 181 | print(ggplot(mpstate, aes(variable, value)) + 182 | geom_line() + 183 | facet_wrap(~ State) + 184 | ylab("Proportion") + xlab("Year") + 185 | scale_x_continuous(breaks = c(2000, 2004, 2007), 186 | labels = c("00","04", "07")) + 187 | scale_y_continuous(formatter = "percent") + 188 | stat_smooth(method = lm, se = FALSE) + 189 | opts(title = "Proportion of Homicides commited with a Firearm")) 190 | dev.print(png, "guns-executions/output/homicides-firearm-st-p.png", 191 | width = 600, height = 400) 192 | 193 | #Guns traced to the US 194 | #c(3090, 5260, 1950, 3060, 6700) 195 | #2004:2008 196 | 197 | #Total guns seized in Mexico 198 | #30000 #hmmm, according to PGR it was 28,000 over two years 199 | #Guns submitted for tracking 200 | #7200 201 | 202 | #Assault by rifle shotgun and larger firearm discharge 203 | asweap <- c(105, 80, 54, 50, 61, 48, 54, 55, 42, 41, 104) 204 | #pop9808 <- c(95790135, 97114831, 98438557, 99715527, 100909374, 101999555, 103001867, 103946866, 104874282, 105790725, 106682518) 205 | print(qplot(1998:2008, asweap, geom="line") + 206 | geom_line() + 207 | # geom_point(aes(size = asweap)) + 208 | xlab("Year") + ylab("Number of Deaths") + 209 | opts(title="Number of deaths in Mexico by\nassault by rifle, shotgun and larger firearm discharge")) 210 | dev.print(png, "guns-executions/output/long-guns.png", 211 | width = 500, height = 400) 212 | 213 | -------------------------------------------------------------------------------- /choropleths/county-maps-homicide.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sun Jan 24 19:33:22 2010 5 | ######################################################## 6 | #Choropleths of the homicide rate by county according to the INEGI 7 | #data source: Estadísticas Vitales INEGI 8 | 9 | 10 | #Clean data file with all homicides *registered* by county and sex 11 | cleanHomicide <- function(filename, sex) { 12 | df <- read.csv(bzfile(filename), skip = 4) 13 | names(df)[1:4] <- c("Code","County","Year.of.Murder","Sex") 14 | df <- df[-grep("=CONCATENAR", df$Code),] 15 | #df <- df[-grep("FUENTE: INEGI. Estadísticas de mortalidad.", df$Code),] 16 | df <- df[-grep("Total", df$County),] 17 | df <- df[-grep("No especificado", df$County),] 18 | df <- df[-grep("Total", df$Year.of.Murder),] 19 | df <- df[-grep("No especificado", df$Year.of.Murder),] 20 | if (sex=="Total") { 21 | df <- df[grep("Total", df$Sex),] 22 | } else if (sex == "Mujer") { 23 | df <- df[grep("Mujer", df$Sex),] 24 | } else if (sex == "Dfbre") { 25 | df <- df[grep("Dfbre", df$Sex),] 26 | } 27 | 28 | df$X.4 <- NULL 29 | df$Year.of.Murder <- as.numeric(gsub('[[:alpha:]]', '', 30 | df$Year.of.Murder)) 31 | df <- subset(df, Year.of.Murder >= 1990) 32 | col2cvt <- 5:ncol(df) 33 | df[,col2cvt] <- lapply(df[,col2cvt], 34 | function(x){ 35 | as.numeric(gsub(",", "", x))}) 36 | df[is.na(df)] <- 0 37 | df$tot <- apply(df[ , col2cvt], 1, sum) 38 | df$CLAVE <- as.numeric(gsub(" ", "", df$Code)) 39 | df 40 | } 41 | 42 | #population from the CONAPO 43 | cleanPopCONAPO <- function(filename) { 44 | pop <- read.csv(bzfile(filename)) 45 | pop <- na.omit(pop) 46 | col2cvt <- 3:ncol(pop) 47 | pop[,col2cvt] <- lapply(pop[ ,col2cvt], 48 | function(x){ 49 | as.numeric(gsub(" ", "", x))}) 50 | popm <- melt(pop, id = c("Clave", "Entidad.federativa.o.municipio")) 51 | #The CONAPO adds a "0" to the county codes, remove it 52 | popm$variable <- substring(popm$variable, 2) 53 | names(popm) <- c("Clave", "County", "Year", "Population") 54 | popm 55 | } 56 | 57 | #population from the inegi 58 | cleanPopINEGI <- function(filename, year, type = "Total") { 59 | pop <- read.csv(filename) 60 | pop <- na.omit(pop) 61 | pop <- subset(pop, County != "Total" & 62 | Code != "#NAME?") 63 | pop$Clave <- as.numeric(gsub(" ", "", pop$Code)) 64 | pop$Code <- NULL 65 | if (type == "Total") { 66 | pop$Hombres <- NULL 67 | pop$Mujeres <- NULL 68 | } else if (type == "Mujer"){ 69 | pop$Total <- NULL 70 | pop$Hombres <- NULL 71 | } 72 | pop$Year <- year 73 | names(pop)[2] <- "Population" 74 | pop 75 | } 76 | 77 | #Plot a map of the murder rate 78 | drawMap <- function(vector, title, breaks, text = NA) { 79 | plotvar<- unlist(vector) 80 | nclr <- 9 81 | plotclr <- brewer.pal(nclr,"Reds") 82 | fillRed <- colorRampPalette(plotclr) 83 | plotvar[plotvar >= maxh] <- maxh -1 84 | colcode <- fillRed(maxh)[round(plotvar) + 1] 85 | plot(mexico.ct.shp, col = colcode, lty = 0, border = "gray") 86 | plot(mexico.st.shp, add = TRUE, lwd=1, border = "gray30") 87 | title(main = title) 88 | colorlegend(posy = c(0.05,0.9), posx = c(0.9,0.92), 89 | col = fillRed(maxh), 90 | zlim=c(0, maxh), zval = breaks, 91 | main = "homicides per\n100,000") 92 | par(bg='white') 93 | } 94 | 95 | #As always it's a pain to make sure the counties line up 96 | #with the correct data 97 | mergeMap <- function(df, year){ 98 | hom.popmX <- subset(df, Year.of.Murder == year) 99 | mun.complete<-data.frame(CLAVE = mexico.ct.shp$CLAVE, 100 | CVE_ENT = mexico.ct.shp$CVE_ENT, 101 | CVE_MUN = mexico.ct.shp$CVE_MUN) 102 | hom.popmX$CLAVE <- gsub(" ", "", hom.popmX$Code) 103 | map<-merge(mun.complete, hom.popmX, by = "CLAVE", all.x = TRUE) 104 | map$rate[is.na(map$rate)] <- 0 105 | map 106 | } 107 | 108 | savePlot <- function(df, year, text, breaks){ 109 | name <- config$titles.ch 110 | map <- mergeMap(df, year) 111 | write.csv(map, paste("choropleths/output/map", 112 | year, ".csv", sep = "")) 113 | filename <- paste("choropleths/output/", name, ", ", 114 | as.character(year), ".png", sep ="") 115 | title <- paste(name, ", ", as.character(year), sep ="") 116 | Cairo(file = filename, 117 | width=960, height=600, type="png", bg="white") 118 | print(drawMap(map$rate, title, breaks, text)) 119 | dev.off() 120 | } 121 | 122 | #read the file with population data from 2006-2008 123 | #read the files with population data from the censuses 124 | readPop <- function(type){ 125 | if(type=="Total") { 126 | popm <- cleanPopCONAPO("timelines/data/pop.csv.bz2") 127 | } else{ 128 | popm <- cleanPopCONAPO("choropleths/data/pop-w.csv.bz2") 129 | } 130 | pop90 <- cleanPopINEGI("choropleths/data/inegi1990.csv", 1990, type) 131 | pop95 <- cleanPopINEGI("choropleths/data/inegi1995.csv", 1995, type) 132 | pop00 <- cleanPopINEGI("choropleths/data/inegi2000.csv", 2000, type) 133 | 134 | rbind(popm, pop90, pop95, pop00) 135 | } 136 | 137 | mergeHomPop <- function(hom, popm){ 138 | hom.popm <- merge(hom, popm, by.x = c("CLAVE", "Year.of.Murder"), 139 | by.y = c("Clave", "Year")) 140 | hom.popm$rate<- (hom.popm$tot / hom.popm$Population) * 141 | 100000 142 | hom.popm <- hom.popm[order(-hom.popm$rate),] 143 | hom.popm$CLAVE <- as.character(hom.popm$CLAVE) 144 | #The municpalities in Oaxaca have changed since the CONAPO 145 | #released its population database, we have to merge them 146 | #by name. Hopefully their boundaries haven't changed much 147 | changed <- setdiff(hom$CLAVE, popm$Clave) 148 | hom.ch <- subset(hom, CLAVE %in% changed) 149 | hom.popm.ch <- merge(hom.ch, popm, by.x = c("County", 150 | "Year.of.Murder"), 151 | by.y = c("County", "Year")) 152 | hom.popm.ch$CLAVE <- NULL 153 | hom.popm.ch$rate<- (hom.popm.ch$tot / hom.popm.ch$Population) * 154 | 100000 155 | hom.popm.ch$County.y <- hom.popm.ch$County 156 | names(hom.popm.ch)[1] <- "County.x" 157 | names(hom.popm.ch)[25] <- "CLAVE" 158 | hom.popm.ch$Code <- as.character(hom.popm.ch$CLAVE) 159 | rbind(hom.popm, hom.popm.ch) 160 | } 161 | 162 | ftext <- c(NA, NA, NA, NA, NA, NA) 163 | mtext <- c(NA, 164 | #1995 165 | "1. Back in 1995 the southwest was the most violent area in Mexico. 166 | Michoacan had a higher homicide rate in 1995 than in 2006 167 | 2. The Golden Triangle has always had a high homicide rate. Most 168 | cartel leaders have come from Badiraguato (dark red). This has 169 | probably been the most violent municipality over the last 20 years.", 170 | 171 | #2000 172 | "Violence has significantly decreased in the southwest. The urban 173 | areas bordering the US are more violent.", 174 | 175 | #2005 176 | "1. The Sinaloa Cartel is battling the Gulf Cartel and the Zetas for 177 | control of Nuevo Laredo. The fiercest fighting takes place in 2005 178 | and the first half of 2006. 179 | 2. A mysterious new cartel preaching Muscular Christianity and 180 | chopping heads off starts operating in Michoacan.", 181 | 182 | #2006 183 | "1. The Sinaloa Cartel fails to defeat the Zetas in Nuevo Laredo. 184 | 2. In the home state of the President of Mexico, Michoacan, a new 185 | cartel called La Familia officially splinters off from the Zetas 186 | sparking a surge in homicides. Starting December 11, soon after a 187 | contentious election, the President sends in the army.", 188 | 189 | #2007 190 | "There seems to be some success in fighting La Familia in Michoacan 191 | and the Beltran Leyvas in Guerrero. Although the number and violence 192 | involved in executions has increased, Mexico has the lowest homicide 193 | rate on record.", 194 | 195 | #2008 196 | "1. Arturo Beltran Leyva is captured. The remaining Beltran Leyvas 197 | accuse the leader of the Sinaloa Cartel of tipping off the government, 198 | sparking an inter-cartel war in Guerrero and Michoacan. 199 | 2. The Sinaloa Cartel starts a turf war to defeat the Juarez Cartel. 200 | 3. Eduardo Arellano Felix of the Tijuana Cartel is captured.") 201 | 202 | #2009 203 | "The war between the sinaloa and juarez cartels intensifies" 204 | 205 | #2010 206 | "Though it hasn't completly defeated the Juarez Cartel, the Sinaloa 207 | Cartel is now firmly established in Juarez. Seeing the writing on the 208 | wall the Gulf Cartel allies itself with the Sinaloa Cartel and La 209 | Familia to take out the Zetas, sparking a surge in violence in Nuevo 210 | León and Tamaulipas" 211 | 212 | #2011? 213 | "After having defeated the Zetas and the Juarez Cartel, the Sinaloa 214 | Cartel goes after the only remaining cartel in Tijuana" 215 | #Not needed after all. Easier in Inkscape 216 | mtext <- c(NA, NA, NA, NA, NA, NA) 217 | 218 | 219 | #For memory reasons these are global variables 220 | #County map 221 | mexico.ct.shp <- readShapePoly(map.inegi.ct, 222 | IDvar = "CLAVE", 223 | proj4string = CRS("+proj=aea")) 224 | #State map 225 | mexico.st.shp <- readShapePoly(map.inegi.st, 226 | proj4string = CRS("+proj=aea")) 227 | 228 | 229 | if(config$sex == "Female"){ 230 | type <- "Mujer" 231 | config$titles.ch <- config$choropleths$ftitle.ch 232 | breaks <- c(0,5,10,Inf) 233 | maxh <- 17 234 | text <- ftext 235 | } else { 236 | type <- "Total" 237 | config$titles.ch <- config$choropleths$mtitle.ch 238 | breaks <- c(0,10,20,40,60,80,Inf) 239 | maxh <- 100 240 | text <- mtext 241 | } 242 | 243 | ################################################################# 244 | #Read the files with the data and population, then merge them 245 | ################################################################ 246 | hom <- cleanHomicide("states/data/homicide-mun-2008.csv.bz2", type) 247 | popm <- readPop(type) 248 | hom.popm <- mergeHomPop(hom, popm) 249 | 250 | ######################################################## 251 | #Draw choropleths of Mexico 252 | ######################################################## 253 | mapply(savePlot, c(1990,1995,2000, 2005:2008), text, 254 | MoreArgs = list(breaks = breaks, df = hom.popm)) 255 | #draw.circle(0,0,100,col="#ffccff50") 256 | #text(20,20,"1") 257 | -------------------------------------------------------------------------------- /timelines/data/homicide-county-month.r~: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones ##### 3 | ##### Website: www.diegovalle.net ##### 4 | ##### Date: 2010-Jan-22 ##### 5 | ######################################################## 6 | 7 | ####################################################### 8 | #Time series of the monthly homicide rate by county in 9 | #the Mexican states with the highest homicide rates, 10 | #plus Michoacan which had the biggest decrease in 11 | #homicides from 2006-2008 12 | ####################################################### 13 | library(ggplot2) 14 | library(Cairo) 15 | 16 | source("constants.r") 17 | 18 | #;;;;Apply an artificial correction to the data 19 | #;;;;to prove that Global Warming is happening 20 | #;;;;Oh wait, wrong file 21 | 22 | #Draw a multiple lines plot of each narco-state 23 | cleanHom <- function(df, state) { 24 | df <- df[grep(state, df$Code), ] 25 | df <- subset(df, Year.of.Murder != "Total" & 26 | Year.of.Murder != "No especificado" & 27 | Month.of.Murder != "Total" & 28 | Month.of.Murder != "No especificado" & 29 | County != "No especificado") 30 | df$Year.of.Murder <- as.numeric(gsub('[[:alpha:]]', '', 31 | df$Year.of.Murder)) 32 | col2cvt <- 5:ncol(df) 33 | df[is.na(df)] <- 0 34 | df$Total.Murders <- apply(df[ , col2cvt], 1, sum) 35 | df$Month.of.Murder <- factor(df$Month.of.Murder) 36 | #The months are in a weird order, so 04=Abril, etc. 37 | levels(df$Month.of.Murder) <- c("04","08","12","01","02","07","06","03","05","11","10","09") 38 | 39 | df$Date <- as.Date(paste(df$Month.of.Murder,"/", 40 | "01", "/", 41 | df$Year.of.Murder, sep =""), "%m/%d/%Y") 42 | #Make sure we code the dates as the last day of the month 43 | df$Date <- as.Date(format(df$Date + 31, "%Y%m01"), "%Y%m%d") - 1 44 | 45 | #The data for the last month of 2008 isn't complete 46 | df <- subset(df, Date < as.Date("12/01/2008", "%m/%d/%Y")) 47 | 48 | #Remove the space that separates the state code from the 49 | #county code so we can merge the homicide data with the 50 | #population data 51 | df$Code <- as.numeric(gsub("[ ]", "", df$Code )) 52 | df 53 | } 54 | 55 | mergeHomPop <- function(df, pop, cutoff) { 56 | df.pop <- merge(df, pop, by.x=c("Code", "Year.of.Murder"), 57 | by.y=c("Code", "Year")) 58 | #Only big counties! 59 | #Subseting by size doesn't work because populations change 60 | #over time, so 61 | #a county that started at 90,000 will be missing half the 62 | #the observations 63 | counties100 <- subset(df.pop, Population > cutoff) 64 | states <- unique(factor(counties100$County.x)) 65 | df.pop <- subset(df.pop, County.x %in% states) 66 | 67 | df.pop$rate <- (df.pop$Total.Murders / df.pop$Population * 100000) * 12 68 | #An NA means there were no murders, so we have to change it to 0 69 | #and since the INEGI in all its wisdom decided to simply delete 70 | #the rows with no monthly homicides we have to recreate the 71 | #database to include them 72 | start <- as.Date("2006/2/01") 73 | next.mon <- seq(start, length=35, by='1 month') 74 | period <- next.mon - 1 75 | dates.df <- data.frame(Date = rep(period, 76 | each = length(states)), 77 | County.x = rep(states, 78 | length(states) * 79 | length(period)) 80 | ) 81 | df.pop <- merge(dates.df, df.pop, 82 | by = c("Date", "County.x"), 83 | all.x = TRUE, all.y = TRUE) 84 | df.pop$rate[is.na(df.pop$rate)] <- 0 85 | df.pop$Total.Murders[is.na(df.pop$Total.Murders)] <- 0 86 | df.pop 87 | } 88 | 89 | getData <- function(df, pop, state, cutoff){ 90 | hom.clean <- cleanHom(df, state) 91 | mergeHomPop(hom.clean, pop, cutoff) 92 | } 93 | 94 | cleanPop <- function(filename) { 95 | pop <- read.csv(bzfile(filename)) 96 | pop <- na.omit(pop) 97 | col2cvt <- 3:ncol(pop) 98 | pop[,col2cvt] <- lapply(pop[ ,col2cvt], 99 | function(x){as.numeric(gsub(" ", "", x))}) 100 | popm <- melt(pop, id = c("Clave", "Entidad.federativa.o.municipio")) 101 | #remove the space before the county code 102 | popm$variable <- as.numeric(substring(popm$variable, 2)) 103 | names(popm) <- c("Code", "County", "Year","Population") 104 | popm 105 | } 106 | 107 | drawPlot <- function(df.pop) { 108 | ggplot(df.pop, aes(Date, rate)) + 109 | geom_point(aes(size=Total.Murders), color="darkred") + 110 | facet_wrap(~ County.x, as.table = FALSE, ncol = 1, 111 | scale="free_y") + 112 | scale_x_date() + 113 | #geom_smooth(se = FALSE) + 114 | xlab("") + ylab("Homicide rate") 115 | } 116 | 117 | #Todo: figure out why doesn't this work 118 | addvline <- function(op.date) { 119 | geom_vline(aes(xintercept = op.date), alpha=.4) 120 | } 121 | 122 | addtext <- function(p, date, opname){ 123 | p + geom_text(aes(x,y), label = opname, 124 | data = data.frame(x = date, y = -10), 125 | size = 3, hjust = 1, vjust = 0) 126 | # geom_vline(aes(xintercept = date), alpha=.4) 127 | } 128 | 129 | hom <- read.csv(bzfile("county-month.csv.bz2")) 130 | pop <- cleanPop("pop.csv.bz2") 131 | 132 | #the county must be this big to enter the chart 133 | popsize <- 100000 134 | 135 | ######################################################## 136 | #Finally, the plots 137 | ######################################################## 138 | 139 | #Baja Califronia Norte! as the ICESI would say, hahahaha 140 | bcn.df <- getData(hom, pop, baja.california, popsize) 141 | bcn.df$group <- ifelse(bcn.df$Date < op.tij, 1, 0) 142 | 143 | p <- drawPlot(bcn.df) + geom_vline(aes(xintercept = op.tij), alpha=.4) 144 | addtext(p, op.tij, "Joint Operation Tijuana") + geom_smooth(aes(group = group), se = FALSE) 145 | dev.print(png, file="output/Baja California.png", width=600, height=600) 146 | 147 | #Sonora 148 | son.df <- getData(hom, pop, sonora, popsize) 149 | p <- drawPlot(son.df) 150 | p + geom_smooth(se = FALSE) 151 | dev.print(png, file = "output/Sonora.png", width=600, height=600) 152 | 153 | #Chihuahua 154 | chi.df <- getData(hom, pop, chihuahua, popsize) 155 | chi.df$group <- 1 156 | chi.df$group[chi.df$Date < op.tria.dor] <- 0 157 | chi.df$group[chi.df$Date >= op.chi] <- 2 158 | p <- drawPlot(chi.df) + geom_vline(aes(xintercept = op.chi), alpha=.4) 159 | p <- addtext(p, op.chi, "Joint Operation Chihuahua") 160 | p <- addtext(p, op.tria.dor, "Jint Operation Triangulo Dorado") 161 | p + geom_vline(aes(xintercept = op.tria.dor), alpha=.4) + geom_smooth(aes(group = group), se = FALSE) 162 | dev.print(png, file = "output/Chihuahua.png", width=600, height=600) 163 | 164 | #Michoacán 165 | mich.df <- getData(hom, pop, michoacan, popsize) 166 | mich.df$group <- ifelse(mich.df$Date < op.mich, 1, 0) 167 | p <- drawPlot(mich.df) + geom_vline(aes(xintercept = op.mich), alpha=.4) 168 | addtext(p, op.mich, "Joint Operation Michoacan") + geom_smooth(aes(group = group), se = FALSE) 169 | dev.print(png, file = "output/Michoacan.png", width=600, height=600) 170 | 171 | #Sinadroga 172 | sin.df <- getData(hom, pop, sinaloa, popsize) 173 | sin.df$group <- 1 174 | sin.df$group[sin.df$Date < op.tria.dor] <- 0 175 | sin.df$group[sin.df$Date >= op.sin] <- 2 176 | p <- drawPlot(sin.df) + geom_vline(aes(xintercept = op.sin), alpha=.4) 177 | p <- addtext(p, op.sin, "Joint Operation Culiacan-Navolato") 178 | p <- addtext(p, op.tria.dor, "Jint Operation Triangulo Dorado") 179 | p + geom_vline(aes(xintercept = op.tria.dor), alpha=.4) + geom_smooth(aes(group = group), se = FALSE) 180 | dev.print(png, file = "output/Sinaloa.png", width=600, height=600) 181 | 182 | #Durango 183 | dur.df <- getData(hom, pop, durango, popsize) 184 | dur.df$group <- ifelse(dur.df$Date < op.tria.dor, 1, 0) 185 | p <- drawPlot(dur.df) + geom_vline(aes(xintercept = op.tria.dor), alpha=.4) 186 | addtext(p, op.tria.dor, "Joint Operation Triangulo Dorado") + geom_smooth(aes(group = group), se = FALSE) 187 | dev.print(png, file = "output/Durango.png", width=600, height=600) 188 | 189 | #The data for Oaxaca and Guerrero are in another file 190 | hom <- read.csv(bzfile("county-month-gue-oax.csv.bz2")) 191 | 192 | #Guerrero 193 | gue.df <- getData(hom, pop, guerrero, popsize) 194 | gue.df$group <- ifelse(gue.df$Date < op.gue, 1, 0) 195 | p <- drawPlot(gue.df) + geom_vline(aes(xintercept = op.gue), alpha=.4) 196 | addtext(p, op.gue, "Joint Operation Guerrero") + geom_smooth(aes(group = group), se = FALSE) 197 | dev.print(png, file = "output/Guerrero.png", width=600, height=600) 198 | 199 | #The data for Nuevo Leon and Tamaulipas are in yet another file 200 | hom <- read.csv(bzfile("county-month-nl-tam.csv.bz2")) 201 | 202 | #Tamaulipas 203 | tam.df <- getData(hom, pop, tamaulipas, popsize) 204 | tam.df$group <- ifelse(tam.df$Date < op.gue, 1, 0) 205 | p <- drawPlot(tam.df) 206 | p + geom_smooth(se = FALSE) 207 | dev.print(png, file = "output/Tamaulipas.png", width=600, height=600) 208 | 209 | #Nuevo Leon 210 | #Tamaulipas 211 | nl.df <- getData(hom, pop, nuevo.leon, popsize) 212 | nl.df$group <- ifelse(nl.df$Date < op.gue, 1, 0) 213 | p <- drawPlot(nl.df) 214 | p + geom_smooth(se = FALSE) 215 | dev.print(png, file = "output/Nuevo-Leon.png", width=600, height=600) 216 | 217 | ######################################################## 218 | #Variance estimates to see if there is cheating going on 219 | ######################################################## 220 | df <- hom[grep("02 004", hom$Code), ] 221 | df <- subset(df, Year.of.Murder != "Total" & 222 | Year.of.Murder != "No especificado" & 223 | Month.of.Murder != "Total" & 224 | Month.of.Murder != "No especificado" & 225 | County != "No especificado") 226 | df$Year.of.Murder <- as.numeric(gsub('[[:alpha:]]', '', 227 | df$Year.of.Murder)) 228 | col2cvt <- 5:ncol(df) 229 | df[is.na(df)] <- 0 230 | df$Total.Murders <- apply(df[ , col2cvt], 1, sum) 231 | df$Month.of.Murder <- factor(df$Month.of.Murder) 232 | #The months are in a weird order, so 04=Abril, etc. 233 | levels(df$Month.of.Murder) <- c("04","08","12","01","02","07","06","03","05","11","10","09") 234 | 235 | df$Date <- as.Date(paste(df$Month.of.Murder,"/", 236 | "01", "/", 237 | df$Year.of.Murder, sep =""), "%m/%d/%Y") 238 | df$Month <- as.numeric(as.character(df$Month.of.Murder)) 239 | #The data for the last month of 2008 isn't complete 240 | df <- subset(df, Year.of.Murder == 2008) 241 | lm_df <- function(df) { 242 | df <- df[order(df$Month),] 243 | lm(Month ~ Total.Murders, data = df) 244 | } 245 | dmodels <- dlply(df, .(County), lm_df) 246 | dcoefs <- ldply(dmodels, function(x) summary(x)$r.squared) 247 | dcoefs[order(dcoefs$V1),] 248 | ggplot(df, aes(Date, Total.Murders, group = County)) + geom_point() + facet_wrap(~ County, scales = "free_y") + geom_smooth() 249 | 250 | ######################################################## 251 | #Is the government hiding murders in Tijuana? 252 | ######################################################## 253 | #Two police agencies give different estimates for the number 254 | #of homicides in Tijuana in 2009 255 | #Data from: 256 | #http://www.la-ch.com/index.php?view=article&catid=42:general&id=113:desaparecen-267-muertos-de-cifras-oficiales&format=pdf&option=com_content&Itemid=62 257 | sspe <- c(30, 33, 48, 42, 21, 33, 28, 29, 32, 87, 123, 71) #577 258 | pgjep2 <- c(44, 37, 56, 61, 21, 38, 36, 33, 73, 147, 212, 86) #844 259 | #The INEGI vital statistics also give a different estimate of the number of homicides 260 | inegip1 <- c(39, 35, 47, 54, 20, 36, 33, 29, 51, 139, 191, 59) #733 261 | 262 | #http://www.johndcook.com/blog/2010/01/25/estimating-reporting-rates/#more-4309 263 | #The other town has n2 burglaries with a probability p2 of being reported. If the expected number of reported burglaries are equal, then n1p1 = n2p2 = r. The variance in the burglary reports from the two towns will be r(1 – p1) and r(1 – p2). If p1 is less than p2 there will be more variance in the data from the first city. 264 | #I only use the numbers from jan-oct because the inegi takes its time to record the statistics 265 | var(sspe[1:10]) 266 | var(pgje[1:10]) 267 | var(inegi[1:10]) 268 | 269 | #The inegi commands more respect than the Tijuana police, so we would expect its variance to be lower 270 | var(inegi[1:10]) < var(pgje[1:10]) 271 | 272 | np1 np1(1-p1) = v1 n=v1/p1(1-p1) 273 | np2 np2(1-p2) = v2 n=v2/p2(1-p2) 274 | 275 | p1 <- seq(.01,.99, by = .01);p2 <- p1 +.1; 276 | plot(100*p1*(1-p1)) 277 | plot(100*p2*(1-p2)) 278 | -------------------------------------------------------------------------------- /states/homicide-bystate.r~: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sat Jan 23 21:10:55 2010 5 | ######################################################## 6 | #1. Bar chart of the homicide rate in 2008 7 | #2. Map of the homicide rate 8 | #3. Bar chart of the difference in homicide rate 2008-2006 9 | #4. Map of the same 10 | #5. Small multiples of the evolution of the murder rate 1990-2008 11 | 12 | #location of the ICESI map 13 | source("maps-locations.r") 14 | source("library/utilities.r") 15 | 16 | ############################################# 17 | #String Constants 18 | kyears <- 1990:2008 19 | #############################################3 20 | 21 | 22 | cleanHom <- function(type="Total") { 23 | hom <- read.csv(bzfile("states/data/homicide-mun-2008.csv.bz2"), skip=4) 24 | names(hom)[1:4] <- c("Code", "County", "Year.of.Murder", "Sex") 25 | hom <- hom[grep("=CONCATENAR", hom$Code),] 26 | hom <- hom[-grep("Extranjero", hom$County),] 27 | hom <- hom[grep(type, hom$Sex),] 28 | hom$Year.of.Murder <- as.numeric(as.numeric(gsub('[[:alpha:]]', '', 29 | hom$Year.of.Murder))) 30 | hom <- subset(hom, Year.of.Murder >= 1990) 31 | #Get rid of the commas in the numbers: 155,000 to 155000 32 | col2cvt <- 5:ncol(hom) 33 | hom[ ,col2cvt] <- lapply(hom[ ,col2cvt], 34 | function(x){as.numeric(gsub(",", "", x))}) 35 | hom[is.na(hom)] <- 0 36 | hom$Tot <- apply(hom[ , col2cvt], 1, sum) 37 | hom$Code <- as.numeric(gsub(".*,([[:digit:]]+).", "\\1", hom$Code)) 38 | hom 39 | } 40 | 41 | cleanPop <- function(type = "Total") { 42 | if(type == "Mujer") 43 | pop <- read.csv("conapo-pop-estimates/conapo-states-f.csv") 44 | else 45 | pop <- read.csv("conapo-pop-estimates/conapo-states.csv") 46 | pop$Code <- c(1:33) 47 | pop 48 | } 49 | 50 | mergeHomPopYear <- function(hom, pop, year = 2008) { 51 | hom2008 <- merge(subset(hom, Year.of.Murder == year), 52 | pop[ ,c(1,year-1990+2,ncol(pop))], 53 | by="Code", all.x = TRUE) 54 | names(hom2008)[ncol(hom) + 2] <- "popyear" 55 | #The per 100,000 murder rate 56 | hom2008$Rate <- hom2008$Tot / hom2008$popyear * 100000 57 | hom2008$County <- cleanNames(hom2008) 58 | hom2008$County <- factor(hom2008$County) 59 | hom2008 60 | } 61 | 62 | redScale <- function(rate) { 63 | #the second highest value (Sinaloa) 64 | obs <- round(range(rate)[2]) + 1 65 | if(obs > 70) { 66 | obs <- round(-sort(-rate)[2]) + 1 67 | } 68 | index <- round(rate) + 1 69 | clr.inc <- colorRampPalette(brewer.pal(8, "Reds")) 70 | vec <- clr.inc(obs)[index] 71 | #special color for Chihuahua since it resembles a war zone, 72 | #otherwise the colors for the rest of the country would be 73 | #too light 74 | vec[rate > 70] <- "#410101" 75 | vec 76 | } 77 | 78 | barPlot <- function(hom2008, year="") { 79 | hom2008$color <- redScale(hom2008$Rate) 80 | hom2008$County <- reorder(hom2008$County, hom2008$Rate) 81 | xmax <- range(hom2008$Rate)[2] + 5 82 | hom.mean <- wtd.mean(hom2008$Rate, hom2008$popyear) 83 | ggplot(data = hom2008, aes(County, Rate)) + 84 | geom_bar(stat = "identity", aes(fill = color)) + 85 | scale_y_continuous(limits = c(0, xmax)) + 86 | coord_flip() + 87 | labs(x = "", y = "Homicides per 100,000") + 88 | opts(title = paste(config$title.barplot, year)) + 89 | opts(legend.position = "none") + 90 | scale_fill_identity(aes(breaks = color)) + 91 | geom_text(aes(label=round(Rate, digits = 1)), hjust = -.05, 92 | color = "gray50") + 93 | geom_hline(yintercept = hom.mean, alpha=.1, linetype=2) 94 | } 95 | 96 | #We need to order the variables by name to match them with the map 97 | mapOrder <- function(df, varname = "County.x"){ 98 | df$County <- iconv(df[[varname]], "", "ASCII") 99 | df$County <- cleanNames(df) 100 | #Why doesnt this work for Michoac!An, I cheated and used the state 101 | #number as the no.match value. rrrrrrrgh!!!!!!!!!!!!!! 102 | df$Code <- pmatch(df$County, mexico.shp$NAME, 16) 103 | df.merge <- merge(data.frame(mexico.shp$NAME, Code = 1:32), 104 | df, by="Code", all.x = TRUE) 105 | df.merge 106 | } 107 | 108 | plotMap <- function(mexico.shp, colors, plotclr, legend="", title="") { 109 | plot(mexico.shp, col = colors, border="black", lwd=2) 110 | title(main = title) 111 | if (legend !="") { 112 | legend("topright", legend = legend, 113 | fill = plotclr, cex=0.8, bty="n") 114 | } 115 | par(bg = "white") 116 | } 117 | 118 | #return a data frame with the change in homicide rates 119 | getDiff <- function(hom, pop, year1, year2) { 120 | if(year1>year2){ 121 | temp <- year2 122 | year2 <- year1 123 | year1 <- temp 124 | } 125 | hom2008 <- merge(subset(hom, Year.of.Murder == year2), 126 | pop[ ,c(1,year2-1990+2,ncol(pop))], 127 | by="Code", all.x=T) 128 | names(hom2008)[ncol(hom)+2] <- "popyear2" 129 | hom2008$Rate2008 <- hom2008$Tot / hom2008[[ncol(hom)+2]] * 100000 130 | hom2006 <- merge(subset(hom, Year.of.Murder == year1), 131 | pop[ ,c(1,year1-1990+2,ncol(pop))], 132 | by="Code", all.x=T) 133 | names(hom2006)[ncol(hom)+2] <- "popyear1" 134 | hom2006$Rate2006 <- hom2006$Tot / hom2006[[ncol(hom)+2]] * 100000 135 | hom.diff <- merge(hom2008,hom2006, by ="Code") 136 | hom.diff$Diff <- hom.diff$Rate2008 - hom.diff$Rate2006 137 | hom.diff$County.x <- factor(hom.diff$County.x) 138 | hom.diff 139 | } 140 | 141 | #red green scale for the difference barplot 142 | greenReds <- function(difference){ 143 | clr.inc <- colorRampPalette(brewer.pal(5, "Oranges")) 144 | clr.dec <- colorRampPalette(brewer.pal(5, "Greens")) 145 | #I (heart) R 146 | colors <- difference 147 | obs <- abs(round(range(difference)[2])) + 1 148 | index <- abs(round(difference[difference >= 0])) + 1 149 | colors[difference >= 0] <- clr.inc(obs)[index] 150 | 151 | index <- abs(round(difference[difference < 0])) + 1 152 | colors[difference < 0] <- clr.dec(obs)[index] 153 | 154 | colors 155 | } 156 | 157 | #Based on code from: 158 | #http://learnr.wordpress.com/2009/06/01/ggplot2-positioning-of-barplot-category-labels/ 159 | barDiff <- function(hom.diff, year1="", year2="") { 160 | hom.diff$color <- greenReds(hom.diff$Diff) 161 | hom.diff$hjust <- ifelse(hom.diff$Diff > 0, 1.1, -.1) 162 | hom.diff$text.pos <- ifelse(hom.diff$Diff > 0, -.05, 1) 163 | hom.diff$County.x <- cleanNames(hom.diff, "County.x") 164 | hom.diff$County.x <- factor(hom.diff$County.x) 165 | hom.diff$County.x <- reorder(hom.diff$County.x, hom.diff$Diff) 166 | xmin <- range(hom.diff$Diff)[1] - 2 167 | xmax <- range(hom.diff$Diff)[2] + 2 168 | hom.mean08 <- wtd.mean(hom.diff$Rate2008, hom.diff$popyear2) 169 | hom.mean06 <- wtd.mean(hom.diff$Rate2006, hom.diff$popyear1) 170 | ggplot(hom.diff, aes(x=County.x, y=Diff, label=County.x, 171 | hjust = hjust)) + 172 | geom_text(aes(y = 0, size=3)) + 173 | geom_bar(stat = "identity",aes(fill = color)) + 174 | scale_y_continuous(limits = c(xmin, xmax)) + 175 | coord_flip() + 176 | labs(x = "", y = "Change in Rate per 100,000") + 177 | scale_x_discrete(breaks = NA) + 178 | opts(legend.position = "none") + 179 | opts(title = paste(config$title.bardiff, 180 | year1, "-", year2, ")", sep = "")) + 181 | scale_fill_identity(aes(breaks = color)) + 182 | geom_text(aes(label=round(Diff, digits = 1), hjust = text.pos), 183 | color="gray50") + 184 | geom_hline(yintercept = hom.mean08 - hom.mean06, alpha=.1, , linetype=2) 185 | } 186 | 187 | #################################################### 188 | #Small Multiples Plot of Murders by State 189 | #################################################### 190 | totalHom <- function(hom){ 191 | total.hom <- ddply(hom, .(Year.of.Murder), function(df) sum(df$Tot)) 192 | total.hom$pop <- unlist(pop[33, 2:(ncol(pop)-1)]) 193 | total.hom$Rate <- (total.hom$V1 / total.hom$pop) * 100000 194 | total.hom 195 | } 196 | 197 | mergeHomPopS <- function(hom){ 198 | mpop <- melt(subset(pop, State != "Nacional"), id=c("Code", "State")) 199 | mpop$variable <- as.numeric(substring(mpop$variable, 2)) 200 | mpop$Year.of.Murder <- mpop$variable 201 | hom.mpop <- merge(hom, mpop, by=c("Code","Year.of.Murder"), 202 | all.y = TRUE) 203 | if(any(is.na(hom.mpop$Tot))) 204 | hom.mpop[is.na(hom.mpop$Tot), ]$Tot <- 0 205 | hom.mpop$Rate <- hom.mpop$Tot / hom.mpop$value * 100000 206 | hom.mpop$State <- cleanNames(hom.mpop, "State") 207 | hom.mpop$State <- factor(hom.mpop$State) 208 | hom.mpop 209 | } 210 | 211 | cluster <- function(hom.mpop, nclusters){ 212 | #k-means clustering to order the plot 213 | t <- cast(hom.mpop[,c(26:27,29)], State ~ variable, value = "Rate") 214 | t[is.na(t)] <- 0 215 | cl <- kmeans(t[,2:ncol(t)], nclusters) 216 | t$Cluster <- cl$cluster 217 | t <- merge(ddply(t, .(Cluster), function(df) mean(df$"2008")), 218 | t, by = "Cluster") 219 | t <- t[,c(1,2,3)] 220 | hom.mpop <- merge(hom.mpop, t, by = "State") 221 | hom.mpop$State <- reorder(hom.mpop$State, -hom.mpop$V1) 222 | hom.mpop 223 | } 224 | 225 | smallMultiples <- function(hom, pop, nclusters = 8){ 226 | total.hom <- totalHom(hom) 227 | hom.mpop <- mergeHomPopS(hom) 228 | hom.mpop <- cluster(hom.mpop, nclusters) 229 | p <- ggplot(hom.mpop, aes(Year.of.Murder, Rate)) + 230 | geom_line(aes(color = factor(Cluster)), size = 1) 231 | p + facet_wrap(~ State, as.table = TRUE, 232 | scale="free_y") + 233 | labs(x = "", y = "Homicide Rate") + 234 | opts(title = config$title.sm) + 235 | scale_x_continuous(breaks = c(1990, 2000, 2008), 236 | labels = c("90", "00", "08")) + 237 | theme_bw() + 238 | geom_line(data = total.hom, aes(Year.of.Murder, Rate), 239 | color="gray70", linetype = 2, size =.5) + 240 | opts(legend.position = "none") 241 | } 242 | 243 | 244 | if(config$sex == "Women") { 245 | type <- "Mujer" 246 | config$title.sm <- config$states$ftitle.sm 247 | config$title.bardiff <- config$states$ftitle.bardiff 248 | config$title.barplot <- config$states$ftitle.barplot 249 | } else { 250 | type <- "Total" 251 | config$title.sm <- config$states$mtitle.sm 252 | config$title.bardiff <- config$states$mtitle.bardiff 253 | config$title.barplot <- config$states$mtitle.barplot 254 | } 255 | 256 | ########################################################## 257 | #Read the data 258 | ########################################################## 259 | hom <- cleanHom(type) 260 | pop <- cleanPop(type) 261 | 262 | ######################################################## 263 | #Barplot with the homicide rate in 2008 264 | ######################################################## 265 | year <- config$year 266 | hom.year <- mergeHomPopYear(hom, pop, year) 267 | print(barPlot(hom.year, year)) 268 | dev.print(png, file = "states/output/2008-homicide-bars.png", 269 | width = 480, height = 480) 270 | 271 | ######################################################## 272 | #Map with the homicide rate in 2008 273 | ######################################################## 274 | mexico.shp <- readShapePoly(map.icesi, 275 | IDvar = "NAME", 276 | proj4string = CRS("+proj=longlat")) 277 | 278 | hom.year.map <- mapOrder(hom.year, "County") 279 | 280 | Cairo(file="states/output/2008-homicide-map.png", width=480, height=480) 281 | print(plotMap(mexico.shp, redScale(hom.year.map$Rate))) 282 | dev.off() 283 | 284 | ################################################################### 285 | #Bar plot of the change in homicide rate from the start of the 286 | #drug war at the end of 2006 till 2008 287 | ################################################################### 288 | year1 <- config$year1 289 | year2 <- config$year2 290 | hom.diff <- getDiff(hom, pop, year1, year2) 291 | print(barDiff(hom.diff, year1, year2)) 292 | dev.print(png, file="states/output/2006-2008-change-homicide.png", 293 | width=480, height=480) 294 | 295 | ######################################################## 296 | #Map of the change in homicide rates 297 | ######################################################## 298 | hom.diff.map <- mapOrder(hom.diff, "County.x") 299 | Cairo(file="states/output/2006-2008-change-homicide-map.png", width=480, height=480) 300 | print(plotMap(mexico.shp, greenReds(hom.diff.map$Diff))) 301 | dev.off() 302 | 303 | ######################################################## 304 | #Small Multiples of each state 305 | ######################################################## 306 | #This is how you get anti-aliasing in R 307 | Cairo(file="states/output/1990-2008-homicide-small-multiples-w.png", 308 | type="png", width=960, height=600) 309 | #If it's for women you might want to swith the number of 310 | #kmeans clusters to 4 311 | print(smallMultiples(hom, pop, 8)) 312 | dev.off() 313 | 314 | 315 | #The graph for Chihuahua looks similar to the hockey stick 316 | #of global temperatures 317 | #Coincidence? or is global warming to blame 318 | 319 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | Analysis of the Drug War in Mexico 2 | ========================================================= 3 | Homicides in Mexico have increased greatly since the government sent in the military to fight drug cartels in December 2006. These series of scripts explore various statistics related to the increase in violence 4 | 5 | Summary 6 | -------- 7 | 8 | * From 2007 to 2008 the homicide rate increased 65% 9 | * Most of the increase was concentrated in the states the drug cartels are based in or which they are disputing 10 | * In Mexico there are two sources of homicide data: Police records (provided by the SNSP), and the vital statistics system (provided by the INEGI). Not surprisingly both series fail to follow Benford's law. A closer look a the data reveals that: 11 | * The police records in 2008 are missing more than a thousand homicides in Chihuahua! Just to give you some perspective, in Ciudad Juarez, Chihuahua's biggest city, there were more than 1,600 homicides, more than the 1,414 reported in the whole state according to police records. I strongly recommend against using data obtained from police records if you need the latest homicide numbers. 12 | * The Acteal massacre of 45 Tzotzil Indians in Chiapas is missing from the vital statistics database. According to the INEGI there were only 2 deaths during December 1997 in the municipality of Chenalhó. 13 | * The cultivation of marijuana and opium poppy increased and drug eradication decreased after the drug war started. The street price of cocaine in the US increased as a consequence of an increase in seizures by Mexican officials. 14 | * The percentage of people who had ever consumed cocaine doubled in Mexico 15 | * While the proportion of homicides involving a firearm increased by 10% for all of Mexico since 20004, there's not much evidence the expiration of the assault weapons ban was the cause of the rise in violence in the states where the drug cartels are based. 16 | * While the first joint army operations in 2006 and 2007 lowered the homicide rates in Michocán and Guerrero, later military operations in Chihuahua, Durango, Veracruz, Nuevo León and Durango have coincided with increases in homicides and attempts by the Sinaloa cartel to take over drug trafficking routes from rival cartels. 17 | * After the army took control of Ciudad Juarez it became the most violent city in the world. 18 | * Based on execution rates I predict the homicide rate for 2009 will be 15.5 (95% CI 14.8-16.1) and 19.5 (95% CI 18.4-20.7) for 2010. 19 | 20 | The Story in Charts 21 | -------------------- 22 | 23 | _While the drug war met with some success at first, the homicide rate increased 65% between 2007 and 2008_ 24 | 25 | ![homicide rate in Mexico 1994-2009](http://imgur.com/UVhij.png) 26 | 27 | ------------------------------------------------------------------------------------------------------------------------ 28 | 29 | _As you can see Mexico is pretty violent, but not as violent as Brazil—at least in 2008. See the last chart for an estimate for 2010_ 30 | 31 | ![International Comparison](http://imgur.com/1pjDU.png) 32 | 33 | -------------------------------------------------------------------------------------------------------------------------- 34 | 35 | _Most of the violence is concentrated in the states home to the drug cartels._ 36 | 37 | ![Choropleth of homicide rates in 2008](http://imgur.com/oCAiV.png) 38 | 39 | ------------------------------------------------------------------------------------------------------------------------- 40 | 41 | _Here's what it looks like at the municipality level. Keep in mind that the big municipalities tend to draw more attention because of their size, but they also tend to have low population densities_ 42 | 43 | ![Choropleth of homicide rates by municipality](http://imgur.com/RvNHM.png) 44 | 45 | ------------------------------------------------------------------------------------------------------------------------ 46 | 47 | _There have been some big changes in homicide rates since the goverment declared war on the drug cartels at the end of 2006_ 48 | 49 | ![Change in homicide rates](http://imgur.com/Lt6NR.png) 50 | 51 | ----------------------------------------------------------------------------------------------------------------------------- 52 | 53 | _The top row is composed of the states that are home to drug trafficking organizations, though I would have preferred it if the kmeans clustering algorithm had classified Sonora instead of Nayarit with the narco-states. There were some big decreases in the homicide rates of the State of Mexico, Morelos, Oaxaca and Michoacan._ 54 | 55 | ![Homicide by State](http://imgur.com/0iTgo.png) 56 | 57 | ----------------------------------------------------------------------------------------------------------------------------------- 58 | 59 | _Benford’s Law was used as the expected distribution for the first digit of reported homicide rates. Both homicide series failed the test_ 60 | 61 | ![Benford's law](http://imgur.com/1AIZV.png) 62 | 63 | 64 | ------------------------------------------------------------------------------------------------------------------------ 65 | 66 | _In Mexico there are two ways to measure the number of homicides: 1) Police Data (SNSP) and 2) Vital Statistics (INEGI). The bar plot compares them to see if the police records (labeled SNSP and filled with blue) match those of the vital statistics system (labeled INEGI and filled with red). The police records are missing 1,153 homicides in Chihuahua alone! Just to give you some perspective, in Ciudad Juarez, Chihuahua's biggest city, there were more than [1,600 homicides](http://www.reuters.com/article/idUSN08340024), more than the 1,414 reported in the whole state according to police records._ 67 | 68 | _read the file readme.md in the directory [missing-homicides](http://github.com/diegovalle/Homicide-MX-Drug-War/tree/master/missing-homicides/) to see why this happened_ 69 | 70 | ![Who's missing homicides?](http://imgur.com/OQgS2.png "Chihuahua is missing 1153 homicides") 71 | 72 | ------------------------------------------------------------------------------------------------------------------------- 73 | 74 | _More generally there are some big differences in the reported homicides. But not enough to make them useless_ 75 | 76 | ![Differences in homicide rates](http://imgur.com/kDPlZ.png) 77 | 78 | ------------------------------------------------------------------------------------------------------------------------------------ 79 | __With the exception of Michoacan and Guerrero, when the army moved in, there was an increase in homicides__ 80 | 81 | _Chihuahua didn't fare well_ 82 | 83 | ![Chihuahua](http://imgur.com/lW1j2.png) 84 | 85 | _Was Operation Sonora a success?_ 86 | 87 | ![Sonora](http://imgur.com/Ht4Gu.png) 88 | 89 | _Durango also didn't fare well_ 90 | 91 | ![Durango](http://imgur.com/UGxLP.png) 92 | 93 | _A similar thing happened in the rest of the states where the government sent in the army_ 94 | 95 | ------------------------------------------------------------------------------------------------------------------------------------ 96 | 97 | _Annualized monthly homicide rates in Ciudad Juarez. The city turned into the most violent in the world after the army took control of it. Even rushing in reinforcements didn't return the murder rate to its former levels and after three months the bloodshed became even greater._ 98 | 99 | ![Ciudad Juarez](http://imgur.com/XmlSf.png) 100 | 101 | ------------------------------------------------------------------------------------------------------------- 102 | 103 | Things will only get worse until a new equilibrium is reached by the drug traficking organizations 104 | 105 | ![Homicide Rate](http://imgur.com/WCWmI.png) 106 | 107 | 108 | 109 | Requirements 110 | ------------ 111 | 112 | * Imagemagick if you want to merge the state level homicide rate maps with the bar plots 113 | 114 | 115 | 116 | To Run 117 | ------- 118 | 119 | Just run the script "run-all.r" and it will create all the charts in their respective directories. If you want to get data for femicides edit the file "config/config.yaml" and change the sex to Female 120 | 121 | 122 | Contents 123 | -------- 124 | In the directories you'll find: 125 | 126 | * accidents-homicides-suicides: Estimates the homicide rate for all of Mexico based on accidental and violent death data from the INEGI. As a bonus it includes the suicide and accident rates. 127 | * Benford: See of the homicide data from the INEGI and ICESI follow Bendford's law 128 | * conapo-pop-estimates: Estimates of the population of Mexico, done by the CONAPO, at the state level for the period 1990-2030 129 | * choropleths: Choropleths of the murder rate by county in Mexico for the years 1990, 1995, 2000, and 2006-2008 130 | * drugs: Some regressions to see if drug consumption is correlated with the homicide rate. (It is, except for mariguana) 131 | * timelines: Pretty timelines of the effect on the murder rate of sending the army to fight the drug cartels 132 | * historic: Plot of the Mexican homicide rate from 1990 to 2009, a 133 | comparison with that of the US, Brazil, and England and Wales 134 | * missing-homicides: Plots of the different homicide data to figure out how thrust-worthy the Mexican statistics are. 135 | * most-violent-counties: A small multiple plot of the least and most violent municipalities for men and women 136 | * states: Pretty plots and choropleths of the homicide rate at the state level 137 | * trends: Seasonal and trend decomposition of the monthly homicide rates since 1990 138 | * predictions: My predictions on what the homicide rate will be like in 2009 and 2010 139 | * CIEISP: The original data from the CIEISP format the SNSP uses to collect homicide data from each state police agency 140 | * INEGIvsSNSP: A comparison of the homice data from the police and vital statistics from 1997 to 2008 141 | * guns executions: Statistics on homicides, homicides with firearm and executions 142 | 143 | Each directory contains its own readme so you may want to look at them 144 | 145 | Data Sources 146 | ------------ 147 | __Homicide data:__ 148 | 149 | Website of the INEGI: [INEGI](http://www.inegi.org.mx/est/contenidos/espanol/proyectos/continuas/vitales/bd/mortalidad/MortalidadGeneral.asp?s=est&c=11144) for the 150 | murder rate according to vital statistics 151 | 152 | Website of the ICESI: [ICESI](http://www.icesi.org.mx) for the murder rate according to the Mexican police (which turns out not be a murder rate, and only an poor estimate of the final tally) 153 | 154 | Website of the INEGI: [Statistical Yearbooks](http://www.inegi.org.mx/est/contenidos/espanol/sistemas/sisnav/selproy.aspx) that contain the final (over 90% reported) number of reports filed by the police for the crime of murder. 155 | 156 | __Population:__ 157 | 158 | [census data](http://www.inegi.org.mx/inegi/default.aspx?c=9260&s=est) for the years 1990, 1995, 2000 at the county level. 159 | 160 | [CONAPO](http://conapo.gob.mx/index.php?option=com_content&view=article&id=125&Itemid=203) 161 | for population estimates at the national level (1990-2030), state level (1990-2030) and county level(2005-2030). 162 | 163 | __Check the readme files in each subdirectory for detailed information.__ 164 | 165 | 166 | Output 167 | ------- 168 | historic: 169 | 170 | * A png chart of the homicide rate in Mexico 171 | * A png chart of the homicide rate in Mexico, the US, and England and Wales 172 | 173 | timelines: 174 | 175 | * Time series divided into before and after military operations for high crime states or states where the military has been sent 176 | * Chart of the murder rate in Ciudad Juarez before and after the military took over since it is the most violent city in the world 177 | * A latex file in the "report" directory with the confidence intervals of the breakpoints 178 | 179 | most-violent-counties: 180 | 181 | * Small multiples chart of the most and least violent counties for men and women 182 | 183 | choropleths: 184 | 185 | * Choropleths of Mexican Homicide Rates 1990, 1995, 2000, and 2006-2008 186 | 187 | accidents-homicides-suicides: 188 | 189 | * csv files with the rates of accident, homicides and suicides 190 | 191 | states: 192 | 193 | * Maps and charts with the homicide rate at the state level 194 | * Maps and charts with the change in homicide rate from 2006 to 2008 195 | * If you run merge.bat you get a merged chart of the bar plots and maps 196 | * A small multiples chart with the murder rate of each state from 1994 to 2008 197 | 198 | Benford: 199 | 200 | * Do the homicide data follow Benford's law? 201 | 202 | missing-homicides 203 | 204 | * Why is there such a big difference between the two sources (police records and vital statistics) of homicide data 205 | 206 | drugs: 207 | 208 | * Correlations between drug use and homicides 209 | 210 | CIEISP 211 | 212 | * The original data from the SNSP 213 | 214 | guns-executions 215 | 216 | * Statistics on homicides, homicides with firearm and executions 217 | 218 | predictions 219 | 220 | * What will the homicide rates be in 2009 and 2010 221 | 222 | Notes 223 | ----- 224 | Where possible I tried using the official mid-year population estimates from the CONAPO, but for some of the choropleths (1990, 1995 and 2000) I used census data at the county level. 225 | 226 | The state map of Mexico was downloaded from the [GADM database of Global Administrative Areas](http://www.gadm.org/Mexico) and simplified with 227 | [MapShaper](http://mapshaper.com/test/demo.html) 228 | 229 | Author 230 | ----- 231 | [Diego Valle](http://www.diegovalle.net) 232 | -------------------------------------------------------------------------------- /states/homicide-bystate.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones 3 | ##### Website: www.diegovalle.net 4 | ##### Date Created: Sat Jan 23 21:10:55 2010 5 | ######################################################## 6 | #1. Bar chart of the homicide rate in 2008 7 | #2. Map of the homicide rate 8 | #3. Bar chart of the difference in homicide rate 2008-2006 9 | #4. Map of the same 10 | #5. Small multiples of the evolution of the murder rate 1990-2008 11 | 12 | source("library/utilities.r") 13 | 14 | ############################################# 15 | #Constants 16 | kyears.start <- 1990 17 | kyear.zoom <- 1994 #The data before 1994 is iffy 18 | kyears <- kyears.start:2008 19 | #############################################3 20 | 21 | 22 | cleanHom <- function(type="Total") { 23 | hom <- read.csv(bzfile("states/data/homicide-mun-2008.csv.bz2"), skip=4) 24 | names(hom)[1:4] <- c("Code", "County", "Year.of.Murder", "Sex") 25 | hom$County <- iconv(hom$County, "windows-1252", "utf-8") 26 | hom$Code <- iconv(hom$Code, "windows-1252", "utf-8") 27 | hom <- hom[grep("=CONCATENAR", hom$Code),] 28 | hom <- hom[-grep("Extranjero", hom$County),] 29 | hom <- hom[grep(type, hom$Sex),] 30 | hom$Year.of.Murder <- as.numeric(as.numeric(gsub('[[:alpha:]]', '', 31 | hom$Year.of.Murder))) 32 | hom <- subset(hom, Year.of.Murder >= kyears.start) 33 | #Get rid of the commas in the numbers: 155,000 to 155000 34 | col2cvt <- 5:ncol(hom) 35 | hom[ ,col2cvt] <- lapply(hom[ ,col2cvt], 36 | function(x){as.numeric(gsub(",", "", x))}) 37 | hom[is.na(hom)] <- 0 38 | hom$Tot <- apply(hom[ , col2cvt], 1, sum) 39 | hom$Code <- as.numeric(gsub(".*,([[:digit:]]+).", "\\1", hom$Code)) 40 | hom 41 | } 42 | 43 | cleanPop <- function(type = "Total") { 44 | if(type == "Mujer") 45 | pop <- read.csv("conapo-pop-estimates/conapo-states-f.csv") 46 | else 47 | pop <- read.csv("conapo-pop-estimates/conapo-states.csv") 48 | pop$Code <- c(1:33) 49 | pop$State <- iconv(pop$State, "windows-1252", "utf-8") 50 | pop 51 | } 52 | 53 | mergeHomPopYear <- function(hom, pop, year = 2008) { 54 | hom2008 <- merge(subset(hom, Year.of.Murder == year), 55 | pop[ ,c(1,year-kyears.start+2,ncol(pop))], 56 | by="Code", all.x = TRUE) 57 | names(hom2008)[ncol(hom) + 2] <- "popyear" 58 | #The per 100,000 murder rate 59 | hom2008$Rate <- hom2008$Tot / hom2008$popyear * 100000 60 | hom2008$County <- cleanNames(hom2008) 61 | hom2008$County <- factor(hom2008$County) 62 | hom2008 63 | } 64 | 65 | redScale <- function(rate) { 66 | #the second highest value (Sinaloa) 67 | obs <- round(range(rate)[2]) + 1 68 | if(obs > 70) { 69 | obs <- round(-sort(-rate)[2]) + 1 70 | } 71 | index <- round(rate) + 1 72 | clr.inc <- colorRampPalette(brewer.pal(8, "Reds")) 73 | vec <- clr.inc(obs)[index] 74 | #special color for Chihuahua since it resembles a war zone, 75 | #otherwise the colors for the rest of the country would be 76 | #too light 77 | vec[rate > 70] <- "#410101" 78 | vec 79 | } 80 | 81 | barPlot <- function(hom2008, year="") { 82 | hom2008$color <- redScale(hom2008$Rate) 83 | hom2008$County <- reorder(hom2008$County, hom2008$Rate) 84 | xmax <- range(hom2008$Rate)[2] + 5 85 | hom.mean <- wtd.mean(hom2008$Rate, hom2008$popyear) 86 | ggplot(data = hom2008, aes(County, Rate)) + 87 | geom_bar(stat = "identity", aes(fill = color)) + 88 | scale_y_continuous(limits = c(0, xmax)) + 89 | coord_flip() + 90 | labs(x = "", y = "Homicides per 100,000") + 91 | opts(title = paste(config$title.barplot, year)) + 92 | opts(legend.position = "none") + 93 | scale_fill_identity(aes(breaks = color)) + 94 | geom_text(aes(label=round(Rate, digits = 1)), hjust = -.05, 95 | color = "gray50") + 96 | geom_hline(yintercept = hom.mean, alpha=.1, linetype=2) 97 | } 98 | 99 | #We need to order the variables by name to match them with the map 100 | mapOrder <- function(df, varname = "County.x"){ 101 | #df2 <- df 102 | df$County <- cleanNames(df, varname) 103 | #df$County <- iconv(df$County, "windows-1252", "utf-8") 104 | df$Code <- pmatch(df$County, (iconv(mexico.shp$NAME_1,"windows-1252","utf-8"))) 105 | df.merge <- merge(data.frame(iconv(mexico.shp$NAME_1,"windows-1252","utf-8"), Code = 1:32), 106 | df, by="Code", all.x = TRUE) 107 | df.merge 108 | } 109 | 110 | plotMap <- function(mexico.shp, colors, plotclr, legend="", title="") { 111 | plot(mexico.shp, col = colors, border="black", lwd=2) 112 | title(main = title) 113 | if (legend !="") { 114 | legend("topright", legend = legend, 115 | fill = plotclr, cex=0.8, bty="n") 116 | } 117 | par(bg = "white") 118 | } 119 | 120 | #return a data frame with the change in homicide rates 121 | getDiff <- function(hom, pop, year1, year2) { 122 | if(year1>year2){ 123 | temp <- year2 124 | year2 <- year1 125 | year1 <- temp 126 | } 127 | hom2008 <- merge(subset(hom, Year.of.Murder == year2), 128 | pop[ ,c(1,year2-kyears.start+2,ncol(pop))], 129 | by="Code", all.x=T) 130 | names(hom2008)[ncol(hom)+2] <- "popyear2" 131 | hom2008$Rate2008 <- hom2008$Tot / hom2008[[ncol(hom)+2]] * 100000 132 | hom2006 <- merge(subset(hom, Year.of.Murder == year1), 133 | pop[ ,c(1,year1-kyears.start+2,ncol(pop))], 134 | by="Code", all.x=T) 135 | names(hom2006)[ncol(hom)+2] <- "popyear1" 136 | hom2006$Rate2006 <- hom2006$Tot / hom2006[[ncol(hom)+2]] * 100000 137 | hom.diff <- merge(hom2008,hom2006, by ="Code") 138 | hom.diff$Diff <- hom.diff$Rate2008 - hom.diff$Rate2006 139 | hom.diff$County.x <- factor(hom.diff$County.x) 140 | hom.diff 141 | } 142 | 143 | #orange-blue scale for the difference barplot 144 | greenReds <- function(difference){ 145 | clr.inc <- colorRampPalette(brewer.pal(5, "Oranges")) 146 | clr.dec <- colorRampPalette(brewer.pal(5, "Blues")) 147 | #I (heart) R 148 | colors <- difference 149 | obs <- abs(round(range(difference)[2])) + 1 150 | index <- abs(round(difference[difference >= 0])) + 1 151 | colors[difference >= 0] <- clr.inc(obs)[index] 152 | 153 | index <- abs(round(difference[difference < 0])) + 1 154 | colors[difference < 0] <- clr.dec(obs)[index] 155 | 156 | colors 157 | } 158 | 159 | #Based on code from: 160 | #http://learnr.wordpress.com/2009/06/01/ggplot2-positioning-of-barplot-category-labels/ 161 | barDiff <- function(hom.diff, year1="", year2="") { 162 | hom.diff$color <- greenReds(hom.diff$Diff) 163 | hom.diff$hjust <- ifelse(hom.diff$Diff > 0, 1.1, -.1) 164 | hom.diff$text.pos <- ifelse(hom.diff$Diff > 0, -.05, 1) 165 | hom.diff$County.x <- cleanNames(hom.diff, "County.x") 166 | hom.diff$County.x <- factor(hom.diff$County.x) 167 | hom.diff$County.x <- reorder(hom.diff$County.x, hom.diff$Diff) 168 | xmin <- range(hom.diff$Diff)[1] - 2 169 | xmax <- range(hom.diff$Diff)[2] + 2 170 | hom.mean08 <- wtd.mean(hom.diff$Rate2008, hom.diff$popyear2) 171 | hom.mean06 <- wtd.mean(hom.diff$Rate2006, hom.diff$popyear1) 172 | ggplot(hom.diff, aes(x=County.x, y=Diff, label=County.x, 173 | hjust = hjust)) + 174 | geom_text(aes(y = 0, size=3)) + 175 | geom_bar(stat = "identity",aes(fill = color)) + 176 | scale_y_continuous(limits = c(xmin, xmax)) + 177 | coord_flip() + 178 | labs(x = "", y = "Change in Rate per 100,000") + 179 | scale_x_discrete(breaks = NA) + 180 | opts(legend.position = "none") + 181 | opts(title = paste(config$title.bardiff, 182 | year1, "-", year2, ")", sep = "")) + 183 | scale_fill_identity(aes(breaks = color)) + 184 | geom_text(aes(label=round(Diff, digits = 1), hjust = text.pos), 185 | color="gray50") + 186 | geom_hline(yintercept = hom.mean08 - hom.mean06, alpha=.1, 187 | linetype=2) 188 | } 189 | 190 | #################################################### 191 | #Small Multiples Plot of Murders by State 192 | #################################################### 193 | totalHom <- function(hom){ 194 | total.hom <- ddply(hom, .(Year.of.Murder), function(df) sum(df$Tot)) 195 | total.hom$pop <- unlist(pop[33, 2:(ncol(pop)-1)]) 196 | total.hom$Rate <- (total.hom$V1 / total.hom$pop) * 100000 197 | total.hom 198 | } 199 | 200 | mergeHomPopS <- function(hom){ 201 | mpop <- melt(subset(pop, State != "Nacional"), id=c("Code", "State")) 202 | mpop$variable <- as.numeric(substring(mpop$variable, 2)) 203 | mpop$Year.of.Murder <- mpop$variable 204 | hom.mpop <- merge(hom, mpop, by=c("Code","Year.of.Murder"), 205 | all.y = TRUE) 206 | if(any(is.na(hom.mpop$Tot))) 207 | hom.mpop[is.na(hom.mpop$Tot), ]$Tot <- 0 208 | hom.mpop$Rate <- hom.mpop$Tot / hom.mpop$value * 100000 209 | hom.mpop$State <- cleanNames(hom.mpop, "State") 210 | hom.mpop$State <- factor(hom.mpop$State) 211 | hom.mpop 212 | } 213 | 214 | cluster <- function(hom.mpop, nclusters){ 215 | #k-means clustering to order the facets 216 | t <- cast(hom.mpop[,c(26:27,29)], State ~ variable, value = "Rate") 217 | t[is.na(t)] <- 0 218 | cl <- kmeans(t[,2:ncol(t)], nclusters) 219 | t$Cluster <- cl$cluster 220 | t <- merge(ddply(t, .(Cluster), function(df) mean(df$"2008")), 221 | t, by = "Cluster") 222 | t <- t[,c(1,2,3)] 223 | hom.mpop <- merge(hom.mpop, t, by = "State") 224 | hom.mpop <- ddply(hom.mpop, .(State), transform, 225 | max = Rate[length(Rate)]) 226 | hom.mpop$order <- hom.mpop$V1^3 + hom.mpop$max 227 | hom.mpop$State <- reorder(hom.mpop$State, -hom.mpop$order) 228 | hom.mpop 229 | } 230 | 231 | smallMultiples <- function(hom, pop, nclusters = 8){ 232 | total.hom <- totalHom(hom) 233 | hom.mpop <- mergeHomPopS(hom) 234 | hom.mpop <- cluster(hom.mpop, nclusters) 235 | p <- ggplot(hom.mpop, aes(Year.of.Murder, Rate)) + 236 | geom_line(aes(color = factor(Cluster)), size = 1) 237 | p + facet_wrap(~ State, as.table = TRUE, 238 | scale="free_y") + 239 | labs(x = "", y = "Homicide Rate") + 240 | opts(title = config$title.sm) + 241 | scale_x_continuous(breaks = seq(kyears.start, 2008, by = 4)) + 242 | theme_bw() + 243 | geom_line(data = total.hom, aes(Year.of.Murder, Rate), 244 | color="gray70", linetype = 2, size =.5) + 245 | opts(legend.position = "none") + 246 | opts(axis.text.x=theme_text(angle=60, hjust=1.2 )) + 247 | coord_cartesian(xlim = c(kyear.zoom, 2008)) 248 | 249 | } 250 | 251 | 252 | if(config$sex == "Female") { 253 | type <- "Mujer" 254 | config$title.sm <- config$states$ftitle.sm 255 | config$title.bardiff <- config$states$ftitle.bardiff 256 | config$title.barplot <- config$states$ftitle.barplot 257 | nclust <- 4 258 | } else { 259 | type <- "Total" 260 | config$title.sm <- config$states$mtitle.sm 261 | config$title.bardiff <- config$states$mtitle.bardiff 262 | config$title.barplot <- config$states$mtitle.barplot 263 | nclust <- 8 264 | } 265 | 266 | ########################################################## 267 | #Read the data 268 | ########################################################## 269 | hom <- cleanHom(type) 270 | pop <- cleanPop(type) 271 | 272 | ######################################################## 273 | #Barplot with the homicide rate in 2008 274 | ######################################################## 275 | year <- config$states$year 276 | hom.year <- mergeHomPopYear(hom, pop, year) 277 | print(barPlot(hom.year, year)) 278 | dev.print(png, file = "states/output/2008-homicide-bars.png", 279 | width = 480, height = 480) 280 | 281 | ######################################################## 282 | #Map with the homicide rate in 2008 283 | ######################################################## 284 | load("maps/map_mx.RData") #load mexico.shp 285 | 286 | hom.year.map <- mapOrder(hom.year, "County") 287 | 288 | Cairo(file="states/output/2008-homicide-map.png", width=480, height=480) 289 | print(plotMap(mexico.shp, redScale(hom.year.map$Rate))) 290 | dev.off() 291 | 292 | ################################################################### 293 | #Bar plot of the change in homicide rate from the start of the 294 | #drug war at the end of 2006 till 2008 295 | ################################################################### 296 | year1 <- config$states$year1 297 | year2 <- config$states$year2 298 | hom.diff <- getDiff(hom, pop, year1, year2) 299 | print(barDiff(hom.diff, year1, year2)) 300 | dev.print(png, file="states/output/2006-2008-change-homicide.png", 301 | width=480, height=480) 302 | 303 | ######################################################## 304 | #Map of the change in homicide rates 305 | ######################################################## 306 | hom.diff.map <- mapOrder(hom.diff, "County.x") 307 | Cairo(file="states/output/2006-2008-change-homicide-map.png", width=480, height=480) 308 | print(plotMap(mexico.shp, greenReds(hom.diff.map$Diff))) 309 | dev.off() 310 | 311 | ######################################################## 312 | #Small Multiples of each state 313 | ######################################################## 314 | #This is how you get anti-aliasing in R 315 | Cairo(file="states/output/homicide-small-multiples.png", 316 | type="png", width=960, height=600) 317 | print(smallMultiples(hom, pop, nclust)) 318 | dev.off() 319 | 320 | #The graph for Chihuahua looks similar to the hockey stick 321 | #of global temperatures 322 | #Coincidence? or is global warming to blame 323 | 324 | -------------------------------------------------------------------------------- /timelines/timelines-mun.r: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | ##### Author: Diego Valle Jones ##### 3 | ##### Website: www.diegovalle.net ##### 4 | ##### Date: 2010-Jan-22 ##### 5 | ######################################################## 6 | 7 | ####################################################### 8 | #Time series of the monthly homicide rate by county in 9 | #the Mexican states where joint military operations 10 | #took place from 2006-2008 11 | ####################################################### 12 | 13 | source("timelines/constants.r") 14 | source("library/utilities.r") 15 | 16 | #;;;;Apply an artificial correction to the data 17 | #;;;;to prove that Global Warming is happening 18 | #;;;;Oh wait, wrong file 19 | 20 | cleanHom <- function(df, state) { 21 | df <- df[grep(state, df$Code), ] 22 | df <- subset(df, Year.of.Murder != "Total" & 23 | Year.of.Murder != "No especificado" & 24 | Month.of.Murder != "Total" & 25 | Month.of.Murder != "No especificado" & 26 | County != "No especificado") 27 | df$County <- iconv(df$County, "windows-1252", "utf-8") 28 | df$Year.of.Murder <- as.numeric(gsub('[[:alpha:]]', '', 29 | df$Year.of.Murder)) 30 | col2cvt <- 5:ncol(df) 31 | df[is.na(df)] <- 0 32 | df$Total.Murders <- apply(df[ , col2cvt], 1, sum) 33 | df$Month.of.Murder <- factor(df$Month.of.Murder) 34 | #The months are in alphabetical order, so 04=Abril, etc. 35 | levels(df$Month.of.Murder) <- c("04","08","12","01","02","07","06","03","05","11","10","09") 36 | 37 | df$Date <- as.Date(paste(df$Month.of.Murder,"/", 38 | "01", "/", 39 | df$Year.of.Murder, sep =""), "%m/%d/%Y") 40 | #Make sure we code the dates as the last day of the month 41 | df$Date <- as.Date(format(df$Date + 31, "%Y%m01"), "%Y%m%d") - 1 42 | 43 | 44 | 45 | #The data for the last month of 2008 isn't complete 46 | df <- subset(df, Date < as.Date("12/01/2008", "%m/%d/%Y")) 47 | 48 | #Remove the space that separates the state code from the 49 | #county code so we can merge the homicide data with the 50 | #population data 51 | df$Code <- as.numeric(gsub("[ ]", "", df$Code )) 52 | df 53 | } 54 | 55 | mergeHomPop <- function(df, pop, cutoff, counties = NULL) { 56 | df$Month.of.Murder <- as.numeric(as.character(df$Month.of.Murder)) 57 | df.pop <- merge(df, pop, by.x=c("Code", "Year.of.Murder", 58 | "Month.of.Murder"), 59 | by.y=c("Code", "Year", "Month"), all.x=TRUE) 60 | #Only big counties! 61 | #Subseting by size doesn't work because populations change 62 | #over time, so 63 | #a county that started at 90,000 will be missing half the 64 | #the observations 65 | counties100 <- subset(df.pop, Population > cutoff) 66 | states <- unique(factor(counties100$County.x)) 67 | df.pop <- subset(df.pop, County.x %in% states) 68 | if(!is.null(counties)){ 69 | states <- factor(counties) 70 | df.pop <- subset(df.pop, County.x %in% counties) 71 | } 72 | 73 | df.pop$rate <- (df.pop$Total.Murders / df.pop$value * 100000) * 12 74 | #since the INEGI in all its wisdom decided to simply delete 75 | #the rows with no monthly homicides we have to recreate the 76 | #database to include them 77 | start <- as.Date(as.Date("2005/02/01")) 78 | next.mon <- seq(start, length=47, by='1 month') 79 | period <- next.mon - 1 80 | dates.df <- data.frame(Date = rep(period, 81 | each = length(states)), 82 | County.x = rep(states, length(period)) 83 | ) 84 | dates.df$DateMid <- as.Date(format(dates.df$Date, "%Y%m15"), 85 | "%Y%m%d") 86 | df.pop <- merge(dates.df, df.pop, 87 | by = c("Date", "County.x"), 88 | all.x = TRUE) 89 | #An NA means there were no murders, so we have to change it to 0 90 | df.pop$rate[is.na(df.pop$rate)] <- 0 91 | df.pop$Total.Murders[is.na(df.pop$Total.Murders)] <- 0 92 | df.pop 93 | } 94 | 95 | getData <- function(df, pop, state, cutoff, counties = NULL){ 96 | hom.clean <- cleanHom(df, state) 97 | mergeHomPop(hom.clean, pop, cutoff, counties) 98 | } 99 | 100 | addMonths <- function(pop){ 101 | allmonths <- seq(2005, 2008.9999, by = 1/12) 102 | pop2 <- data.frame(time=allmonths, Year=floor(allmonths), 103 | month = 1:12) 104 | pop2 <- merge(pop, pop2, by = "Year", all.y = TRUE) 105 | pop2$Monthly.Pop[pop2$month == 6] <- 106 | pop2[pop2$month == 6,]$Population 107 | 108 | estimates <- ddply(pop2, .(Code), 109 | function(df) na.spline(df$Monthly.Pop, na.rm=FALSE)) 110 | estimates <- melt(estimates, id="Code") 111 | estimates$Month <- rep(1:12, each=2454) 112 | estimates$Year <- rep(2005:2008, each=2454*12) 113 | pop <- merge(pop, estimates, by = c("Year", "Code")) 114 | pop 115 | } 116 | 117 | cleanPop <- function(filename) { 118 | pop <- read.csv(bzfile(filename)) 119 | pop <- na.omit(pop) 120 | col2cvt <- 3:ncol(pop) 121 | pop[,col2cvt] <- lapply(pop[ ,col2cvt], 122 | function(x){as.numeric(gsub(" ", "", x))}) 123 | popm <- melt(pop, id = c("Clave", "Entidad.federativa.o.municipio")) 124 | #remove the space before the county code 125 | popm$variable <- as.numeric(substring(popm$variable, 2)) 126 | names(popm) <- c("Code", "County", "Year","Population") 127 | popm$County <- iconv(popm$County, "windows-1252", "utf-8") 128 | addMonths(popm) 129 | } 130 | 131 | #http://stackoverflow.com/questions/2270201/how-to-get-geom-vline-and-facet-wrap-from-ggplot2-to-work-inside-a-function 132 | drawTS <- function(df.pop, operations, title, method) { 133 | date.df <- data.frame(d = as.Date(unlist(operations), 134 | origin = "1970-01-01"), 135 | t = names(operations)) 136 | df.pop$County.x <- reorder(factor(df.pop$County.x), -df.pop$rate) 137 | ggplot(df.pop, aes(DateMid, rate)) + 138 | geom_point(aes(size=Total.Murders), color="darkred", alpha =.9) + 139 | scale_x_date() + 140 | geom_smooth(aes(group = group), se = FALSE, method = method) + 141 | xlab("") + ylab("Annualized Homicide Rate") + 142 | geom_text(aes(x = d, label = t, y = -9), 143 | data = date.df, 144 | size = 3, hjust = 1, vjust = 0) + 145 | geom_vline(aes(xintercept = d), data = date.df, 146 | alpha = .4) + 147 | facet_wrap(~ County.x, ncol = 1, 148 | scale="free_y") + 149 | scale_size("Number of\nHomicides") + 150 | opts(title = title) 151 | #theme_bw() 152 | #opts(legend.position = "none") 153 | } 154 | 155 | createPlot <- function(df.pop, operations, title = "", method, hack) { 156 | df.pop$group <- cutDates(df.pop, unlist(operations), hack) 157 | drawTS(df.pop, operations, title, method) 158 | } 159 | 160 | breaks <- function(df, brks, h, ll){ 161 | ndays <- strptime(df$Date, format = "%Y-%m-%d")$mday 162 | rate <- ts(df$rate, start=2005, freq=12) 163 | #fd <- Fstats(rate ~ 1) 164 | bp.mun <- breakpoints(rate ~ ndays, h) 165 | x <- confint(bp.mun, breaks = brks) 166 | data.frame(x$confint) 167 | } 168 | 169 | convertToDate <- function(x){ 170 | d <- as.Date(paste((x %% 12) + 1,"/", 171 | "15", "/", 172 | floor(x / 12) + 2005, sep =""), "%m/%d/%Y") 173 | #as.Date(format(d + 31, "%Y%m01"), "%Y%m%d") - 1 174 | #format(d, format = "%b") 175 | } 176 | 177 | convertDateToChar <- function(df){ 178 | dateToChar <- function(x){ 179 | as.character(as.Date(x)) 180 | format(as.Date(x), format = "%b-%y") 181 | } 182 | dateToFullChar <- function(x){ 183 | as.character(as.Date(x)) 184 | format(as.Date(x), format = "%d-%b-%y") 185 | } 186 | df[,2:4] <- sapply(df[,2:4], dateToChar) 187 | df[,5:ncol(df)] <- sapply(df[,5:ncol(df)], dateToFullChar) 188 | df 189 | } 190 | 191 | addOps <- function(df, ll){ 192 | cbind(df, t(unlist(ll))) 193 | } 194 | 195 | joinBreaksOps <- function(df, ll){ 196 | df[2:4] <- sapply(df[2:4], convertToDate) 197 | df <- addOps(df, ll) 198 | names(df)[1:4] <- c("Municipality", "Lower", "Breakpoints", "Upper") 199 | convertDateToChar(df) 200 | } 201 | 202 | findbreaks <- function(df, brks = 1, h = .15, ll){ 203 | breakpoints <- ddply(df, .(County.x), breaks, brks, h, ll) 204 | joinBreaksOps(breakpoints, ll) 205 | } 206 | 207 | savePlot <- function(df, ll, title = "", width = 700, height = 600, 208 | file, method = lm, hack = 0) { 209 | Cairo(width, height, file=file, type="png", bg="white") 210 | print(createPlot(df, ll, title, method, hack)) 211 | dev.off() 212 | } 213 | 214 | 215 | hom <- read.csv(bzfile("timelines/data/county-month.csv.bz2")) 216 | pop <- cleanPop("timelines/data/pop.csv.bz2") 217 | 218 | #the county must be this big to enter the chart 219 | popsize <- 100000 220 | 221 | ######################################################## 222 | #Finally, the plots 223 | ######################################################## 224 | report.ll <- list() 225 | 226 | #Baja Califronia Norte! as the ICESI would say, hahahaha 227 | bcn.df <- getData(hom, pop, baja.california, popsize) 228 | ll.bcn <- list("Joint Operation Tijuana" = op.tij, 229 | "E.A.F. Captured" = doctor) 230 | #This is a horrible hack. stat_smooth dies when it tries do 231 | #do an lm with n = 1 232 | savePlot(bcn.df, ll.bcn, 233 | "Baja California - Homicide Rates and Military Operations", 234 | file = "timelines/output/Baja California.png", 235 | hack = 15) 236 | report.ll$bcn <- findbreaks(bcn.df, h = 3, ll = ll.bcn) 237 | 238 | 239 | 240 | #Sonora 241 | son.df <- getData(hom, pop, sonora, popsize) 242 | ll.son <- list("Operation Sonora I" = op.son) 243 | savePlot(son.df, ll.son, 244 | "Sonora - Homicide Rates and Military Operations", 245 | file = "timelines/output/Sonora.png") 246 | report.ll$son <- findbreaks(son.df, 1, ll = ll.son) 247 | 248 | #Chihuahua 249 | chi.df <- getData(hom, pop, chihuahua, popsize) 250 | ll.chi <- list("Joint Operation Triangulo Dorado" = op.tria.dor, 251 | "Joint Operation Chihuahua" = op.chi) 252 | savePlot(chi.df, ll.chi, 253 | "Chihuahua - Homicide Rates and Military Operations", 254 | file = "timelines/output/Chihuahua.png", height=700) 255 | report.ll$chi <- findbreaks(chi.df, 1, ll = ll.chi) 256 | 257 | #Interesting municipalities in Chihuahua (bordering the US) 258 | muni <- c("Janos", "Ascensión", 259 | "Guadalupe", 260 | "Ojinaga", "Praxedis G. Guerrero", 261 | "Ahumada", 262 | "Nuevo Casas Grandes", 263 | "Coyame del Sotol") 264 | chi.bdr.df <- getData(hom, pop, chihuahua, 0, muni) 265 | savePlot(chi.bdr.df, ll.chi, 266 | "Chihuahua - Municipalities Near the US Border (excluding C. Juarez)", 267 | file = "timelines/output/Chihuahua-border.png", height=700) 268 | report.ll$chi.bdr <- findbreaks(chi.bdr.df, 1, ll = ll.chi) 269 | 270 | #Now just the ones with a high murder rate (and Creel 'cause of the name) 271 | muni <- c("Coronado", "Matamoros", "Balleza", "Nonoava", 272 | "Valle de Zaragoza", "Hidalgo del Parral", 273 | "San Francisco de Borja", "Namiquipa", "Ocampo", 274 | "Guazapares","Bocoyna") 275 | chi.int.df <- getData(hom, pop, chihuahua, 0, muni) 276 | savePlot(chi.int.df, ll.chi, 277 | "Chihuahua - Some Municipalities with a High Homicide Rate", 278 | file = "timelines/output/Chihuahua-interstng.png", height=700) 279 | report.ll$chi.int <- findbreaks(chi.int.df, 1, ll = ll.chi) 280 | 281 | 282 | #Michoacán (I hate trying to get emacs and R to understand utf!) 283 | mich.df <- getData(hom, pop, michoacan, popsize) 284 | ll.mich <- list("Joint Operation Michoacan" = op.mich, 285 | "A.B.L. Captured" = bel.ley) 286 | savePlot(mich.df, ll.mich, 287 | "Michoacan - Homicide Rates and Military Operations", 288 | file = "timelines/output/Michoacan.png", height=700) 289 | report.ll$mich <- findbreaks(mich.df, 2, ll = ll.mich) 290 | 291 | #Interesting Municipalities in Michoacán (Pacific coast and bordering Guerrero) 292 | muni <- c("Aquila", "Chinicuila", "Coalcomán de Vázquez Pallares", 293 | "Tepalcatepec", 294 | "Aguililla", "Tumbiscatío", "Arteaga", "Apatzingán", 295 | "Churumuco", "Huetamo", "Carácuaro", "Turicato", 296 | "Tacámbaro") 297 | mich.int.df <- getData(hom, pop, michoacan, 0, muni) 298 | savePlot(mich.int.df, ll.mich, 299 | "Michoacan - Municipalities near the Pacific and Guerrero", 300 | file = "timelines/output/Michoacan-interstng.png", height=900) 301 | report.ll$mich.int <- findbreaks(mich.int.df, 2, ll = ll.mich) 302 | 303 | 304 | #Sinadroga 305 | sin.df <- getData(hom, pop, sinaloa, popsize) 306 | ll.sin <- list("Joint Operation Triangulo Dorado" = op.tria.dor, 307 | "Joint Operation Culiacan-Navolato" = op.sin) 308 | savePlot(sin.df, ll.sin, 309 | "Sinaloa - Homicide Rates and Military Operations", 310 | file = "timelines/output/Sinaloa.png", height=700) 311 | report.ll$sin <- findbreaks(sin.df, 1, ll = ll.sin) 312 | 313 | #Municipalities in Sinaloa with a high homicide rate 314 | muni <- c("Badiraguato", "Sinaloa", "Mocorito", "Cosalá", 315 | "San Ignacio") 316 | sin.int.df <- getData(hom, pop, sinaloa, 0, muni) 317 | savePlot(sin.int.df, ll.sin, 318 | "Sinaloa - Municipalities with a high homicide rate", 319 | file = "timelines/output/Sinaloa-interstng.png", height=900) 320 | report.ll$sin.int <- findbreaks(sin.int.df, 1, ll = ll.sin) 321 | 322 | 323 | #Durango 324 | dur.df <- getData(hom, pop, durango, popsize) 325 | ll.dur <- list("Joint Operation Triangulo Dorado" = op.tria.dor, 326 | "Phase III"=op.tria.dor.III) 327 | savePlot(dur.df, ll.dur, 328 | "Durango - Homicide Rates and Military Operations", 329 | file = "timelines/output/Durango.png") 330 | report.ll$dur <- findbreaks(dur.df, 1, ll = ll.dur) 331 | 332 | #Municpalities in Durango with a high murder rate 333 | muni <- c("Súchil", "Mezquital", "Pueblo Nuevo", "San Dimas", 334 | "Vicente Guerrero", "Poanas", "Guanaceví", "Tepehuanes", 335 | "Ocampo", "El Oro") 336 | dur.int.df <- getData(hom, pop, durango, 0, muni) 337 | savePlot(dur.int.df, ll.dur, 338 | "Durango - Municipalities with a high homicide rate", 339 | file = "timelines/output/Durango-interstng.png", height = 900) 340 | report.ll$dur.int <- findbreaks(dur.int.df, 1, ll = ll.dur) 341 | 342 | #The data for Oaxaca and Guerrero are in another file 343 | hom <- read.csv(bzfile("timelines/data/county-month-gue-oax.csv.bz2")) 344 | 345 | #Guerrero 346 | gue.df <- getData(hom, pop, guerrero, popsize) 347 | ll.gue <- list("Joint Operation Guerrero" = op.gue, 348 | "A.B.L. Captured" = bel.ley) 349 | savePlot(gue.df, ll.gue, 350 | "Guerrero - Homicide Rates and Military Operations", 351 | file = "timelines/output/Guerrero.png", height=700) 352 | report.ll$gue <- findbreaks(gue.df, 2, ll = ll.gue) 353 | 354 | 355 | #Interesting Municipalities in guerrero 356 | muni <- c("Zirándaro", "Coyuca de Catalán", "La Unión de Isidoro Montes de Oca", "Coahuayutla de José María Izazaga", "Pungarabato", "Cutzamala de Pinzón", "Arcelia") 357 | gue.df.b <- getData(hom, pop, guerrero, 0, muni) 358 | savePlot(gue.df.b, ll.gue, 359 | "Guerrero - Municipalities Bordering Michoacan", 360 | file = "timelines/output/Guerrero-mich-border.png", 361 | height=700) 362 | report.ll$gue.int <- findbreaks(gue.df.b, 2, ll = ll.gue) 363 | 364 | 365 | 366 | #There were some changes in the municipalities of Oaxaca and 367 | #their populations don't match the ones in the CONAPO data 368 | #so I'm excluding them 369 | #report.ll$oax <- findbreaks(getData(hom, pop, oaxaca, 50000), 370 | # 2, ll = ll) 371 | 372 | 373 | #The data for Nuevo Leon and Tamaulipas is in yet another file 374 | hom <- read.csv(bzfile("timelines/data/county-month-nl-tam.csv.bz2")) 375 | popsize <- 250000 376 | 377 | #Tamaulipas 378 | tam.df <- getData(hom, pop, tamaulipas, popsize) 379 | ll.tam <- list("Troops in N.L." = foxy.troops, 380 | "Joint Operation Tamaulipas-Nuevo Leon" = op.tam.nl) 381 | savePlot(tam.df, ll.tam, 382 | "Tamaulipas - Homicide Rates and Military Operations", 383 | file = "timelines/output/Tamaulipas.png", 384 | method = lm) 385 | report.ll$tam <- findbreaks(tam.df, 2, ll = ll.tam) 386 | 387 | #Nuevo Leon 388 | nl.df <- getData(hom, pop, nuevo.leon, popsize) 389 | ll.nl <- list("Joint Operation Tamaulipas-Nuevo Leon" = op.tam.nl) 390 | savePlot(nl.df, ll.nl, 391 | "Nuevo Leon - Homicide Rates and Military Operations", 392 | file = "timelines/output/Nuevo-Leon.png") 393 | report.ll$nl <- findbreaks(nl.df, 1, ll = ll.nl) 394 | 395 | muni <- c("Apodaca", "Cadereyta Jiménez", 396 | "Juárez", "García", 397 | "Gral. Escobedo", "Guadalupe", 398 | "Monterrey", 399 | "Santa Catarina", "San Nicolás de los Garza", 400 | "San Pedro Garza García", "Santiago") 401 | mont.df <- getData(hom, pop, nuevo.leon, 0, muni) 402 | savePlot(mont.df, ll.nl, 403 | "Nuevo Leon - Metropolitan Area of Monterrey", 404 | file = "timelines/output/Monterrey.png", height=700) 405 | report.ll$mont <- findbreaks(mont.df, 1, ll = ll.nl) 406 | 407 | #Veracruz 408 | hom <- read.csv(bzfile("timelines/data/county-month-ver.csv.bz2")) 409 | popsize <- 250000 410 | 411 | ver.df <- getData(hom, pop, veracruz, popsize) 412 | ll.ver <- list("Joint Operation Veracruz" = op.ver) 413 | savePlot(ver.df, ll.ver, 414 | "Veracruz - Homicide Rates and Military Operations", 415 | file = "timelines/output/Veracruz.png") 416 | report.ll$ver <- findbreaks(ver.df, 1, ll = ll.ver) 417 | 418 | muni <- c("Veracruz", "Xalapa", 419 | "Poza Rica de Hidalgo", "Minatitlán") 420 | ver.int.df <- getData(hom, pop, veracruz, 0, muni) 421 | savePlot(ver.int.df, ll.ver, 422 | "Veracruz - Interesting Municipalities", 423 | file = "timelines/output/Veracruz-int.png", height=700) 424 | report.ll$ver.int <- findbreaks(ver.int.df, 1, ll = ll.ver) 425 | 426 | 427 | 428 | Sweave("timelines/report/report.Rnw", 429 | output = "timelines/report/report.tex") 430 | --------------------------------------------------------------------------------