├── .Rapp.history
├── data
    ├── forProcess
    │   └── .pl
    ├── .DS_Store
    ├── municipalities.xlsx
    └── case_index.csv
├── bitacoras
    └── template
    │   └── bitácora.xlsx
├── .gitignore
├── merge.R
├── Readme.md
├── getBitacoras.R
├── fillingBitacoras.R
├── processBitacoras.R
└── .Rhistory


/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/forProcess/.pl:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quartz/maria-followup/master/data/.DS_Store


--------------------------------------------------------------------------------
/data/municipalities.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quartz/maria-followup/master/data/municipalities.xlsx


--------------------------------------------------------------------------------
/bitacoras/template/bitácora.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quartz/maria-followup/master/bitacoras/template/bitácora.xlsx


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | */.DS_Store
3 | data/forProcess/*.xlsx
4 | data/forProcess/*.csv
5 | data/Las víctimas de María (Responses).xlsx
6 | /process_script
7 | /bitacoras/Empty
8 | /bitacoras/Filled


--------------------------------------------------------------------------------
/merge.R:
--------------------------------------------------------------------------------
 1 | ## merge it back into the database
 2 | 
 3 | interview <- readxl::read_excel('data/forProcess/for_translation_copyedits.xlsx')
 4 | basis <- readxl::read_excel('data/forProcess/rest_for_merge.xlsx')
 5 | 
 6 | 
 7 | interview %>%
 8 |   left_join(basis %>% select(-name, -causes)) %>%
 9 |   mutate(attr='form_interview') %>%
10 |   mutate(month = month(date)) %>%
11 |   mutate(text_field = text_field_en, causes = causes_en) %>%
12 |   select(age, dmu, rmu, date, month, source, causes, c1,c2,c3,c4,c5,c6,attr,id,name,text_field) %>%
13 |   write_csv(paste('data/forProcess/data_',
14 |                   paste(month(Sys.Date()), day(Sys.Date()), year(Sys.Date()), sep='_'),
15 |                   '_EN.csv',sep = ''))
16 | 
17 | interview %>%
18 |   left_join(basis %>% select(-name, -causes)) %>%
19 |   mutate(attr='form_interview') %>%
20 |   mutate(month = month(date)) %>%
21 |   mutate(text_field = text_field_es, causes = causes_es) %>%
22 |   select(age, dmu, rmu, date, month, source, causes, c1,c2,c3,c4,c5,c6,attr,id,name,text_field) %>%
23 |   write_csv(paste('data/forProcess/data_',
24 |                   paste(month(Sys.Date()), day(Sys.Date()), year(Sys.Date()), sep='_'),
25 |                   '_ES.csv',sep = ''))
26 | 
27 | 
28 | 
29 | ## output the current index file with 3 columns:id, deathnumber, dn
30 | existingCases = read_csv('data/case_index.csv')
31 | newCases <- 
32 | interview %>%
33 |   left_join(basis %>% select(-name, -causes)) %>%
34 |   mutate(DeathNumber = DN, DN = paste(DN, year(date),sep='_')) %>%
35 |   select(id, DN, DeathNumber)# %>%
36 |   # write_csv('data/case_index.csv')
37 | 
38 | existingCases %>%
39 |   bind_rows(newCases) %>%
40 |   write_csv('data/case_index.csv')
41 | 
42 | # ~~~ submit pull request
43 | 
44 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # Maria's Dead follow up
 2 | 
 3 | Instructions and scripts for adding new cases for the Hurricane Maria project. The existing databases live at:
 4 | 
 5 | https://hurricanemariadead.com/
 6 | 
 7 | https://losmuertosdemaria.com/
 8 | 
 9 | ### Recent updates:
10 | 
11 | Latest data updated on *Dec. 19, 2018*, **504 cases**
12 | 
13 | *August 30*, 2018, **487 cases**
14 | 
15 | ## Adding new cases
16 | 
17 | You need to first download a copy of this repo to your computer. After you are done with the process described below, submit a pull request. 
18 | 
19 | ### 1. Getting cases from Google Form responses
20 | 
21 | [Las víctimas de María (Responses)](https://docs.google.com/spreadsheets/d/1FK0j919EveJg6HJI_2139uQrX4W-jHX6bcBRAqQVSeI/edit#gid=1388179220)
22 | 
23 | If you don't have permission, ask Ana Campoy, Omaya Sosa, or Larry Fenn to help.
24 | 
25 | * Export the sheet directly:
26 | 
27 |  `File > Download as ... > Microsoft Excel(.xlsx)` 
28 |  
29 | * Delete all the cases in the spreadsheet up to the *Last Updated* date at the top of this Readme document you just downloaded. Save it.
30 | 
31 | * Put the newly-saved file into the `data` folder: `data/Las víctimas de María (Responses).xlsx`.
32 | 
33 | 
34 | ### 2. Generate bitacoras
35 | 
36 | Run `getBitacoras.R` 
37 | 
38 | ### 3. Get information from bitacoras
39 | 
40 | - Inform Laura Candelas (CPI), who will coordinate verification phone calls using the bitacoras in the folder `bitacoras/Empty/` 
41 | - Gather only the complete bitacoras to the folder `bitacoras/Filled/`
42 | 
43 | - Run script `processBitacoras.R` 
44 | 
45 | 
46 | ### 4. Copy edits and translation
47 | 
48 | Send the file file `data/forProcess/for_translation_copyedits.xlsx` to Laura and Ezequiel for copy edits and translation. They need to do three things:
49 | 
50 | * Spanish copy editing for column `text_field_es` (Laura)
51 | * Translate the column `text_field_es` to column `text_field_en` and copy edit `text_field_en` (Ezequiel)
52 | * Translate the column `causes_en` to column `causes_es` (Laura)
53 | 
54 | (The person who does copy edits should also do a general additional verification to make sure qualtitative information in `text_field` matches other basic info, such as gender (as inferred from name), clinical cause of death, mechanism of death, age, etc, and make corrections or flag cases accordingly. )
55 | 
56 | When getting back the file, please save it and replace the exising `data/forProcess/for_translation_copyedits.xlsx`
57 | 
58 | ### 5. Merge it back into the database
59 | 
60 | Run `merge.R`
61 | 
62 | ### 6. Submit pull request
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/getBitacoras.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | library(stringr)
  3 | library(readxl)
  4 | 
  5 | ######################
  6 | ### READ NEW CASES ###
  7 | ######################
  8 | 
  9 | cases <- readxl::read_xlsx("data/Las víctimas de María (Responses).xlsx",col_types = 'text')
 10 | cases <- 
 11 |   cases %>%
 12 |   mutate(DeathDate = as.character(as.Date(as.numeric(`Fecha del fallecimiento`), origin="1899-12-30"))) %>%
 13 |   mutate(
 14 |     case_id = row_number(),
 15 |     Name = tolower(str_split(`Nombre del fallecido:`,' ',simplify = T)[,1]),
 16 |     LastName = chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', tolower(str_split(`Apellidos:`,' ',simplify = T)[,1])),
 17 |     DeathMunicipality=chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', str_squish(tolower(`Lugar del fallecimiento:`))),
 18 |     ResidenceMunicipality=chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', str_squish(tolower(`Lugar de residencia:`))),
 19 |     SecondLastName = chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', tolower(str_split(`Apellidos:`,' ',simplify = T)[,2])))
 20 |   
 21 | 
 22 | 
 23 | #####################
 24 | ### CASE MATCHING ###
 25 | #####################
 26 | 
 27 | govt <- read_csv('data/govt_091817_061218.csv') 
 28 | matches <- cases %>%
 29 |   select(-Name) %>%
 30 |   # match on LastName & SecondLastName
 31 |   left_join(govt) %>%
 32 |   # Age differences <=5
 33 |   filter(as.numeric(`Edad del fallecido:`) <= (Age+5) & as.numeric(`Edad del fallecido:`) >= (Age-5)) %>%
 34 |   # Matching death municipality or residence municipality
 35 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
 36 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
 37 |   filter(
 38 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
 39 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
 40 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
 41 |   ) %>%
 42 |   select(case_id, DN, DeathNumber)
 43 | 
 44 | matches <- cases %>% 
 45 |   filter(!(case_id %in% matches$case_id))%>%
 46 |   # Match on Name,LastName
 47 |   select(-SecondLastName) %>%
 48 |   left_join(govt) %>%
 49 |   # Age differences <=5
 50 |   filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+5) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-5)) %>%
 51 |   # Matching death municipality or residence municipality
 52 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
 53 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
 54 |   filter(
 55 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
 56 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
 57 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
 58 |   ) %>%
 59 |   select(case_id, DN, DeathNumber) %>%
 60 |   rbind(matches)
 61 | 
 62 | matches <- cases %>% 
 63 |   filter(!(case_id %in% matches$case_id))%>%
 64 |   # Match on Name, SecondLastName
 65 |   select(-LastName) %>%
 66 |   left_join(govt) %>%
 67 |   # Age differences <=5
 68 |   filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+5) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-5)) %>%
 69 |   # Matching death municipality or residence municipality
 70 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
 71 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
 72 |   filter(
 73 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
 74 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
 75 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
 76 |   ) %>%
 77 |   select(case_id, DN, DeathNumber) %>%
 78 |   rbind(matches)
 79 | 
 80 | matches <- cases %>% 
 81 |   filter(!(case_id %in% matches$case_id))%>%
 82 |   # Match on SecondLastName alone
 83 |   select(-LastName, -Name) %>%
 84 |   left_join(govt) %>%
 85 |   # Age differences <=5
 86 |   filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>%
 87 |   # Matching death municipality or residence municipality
 88 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
 89 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
 90 |   filter(
 91 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
 92 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
 93 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
 94 |   ) %>%
 95 |   select(case_id, DN, DeathNumber) %>%
 96 |   rbind(matches)
 97 | 
 98 | matches <- cases %>% 
 99 |   filter(!(case_id %in% matches$case_id))%>%
100 |   # Match on LastName alone
101 |   select(-SecondLastName, -Name) %>%
102 |   left_join(govt) %>%
103 |   # Age differences <=5
104 |   filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>%
105 |   # Matching death municipality or residence municipality
106 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
107 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
108 |   filter(
109 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
110 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
111 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
112 |   ) %>%
113 |   select(case_id, DN, DeathNumber) %>%
114 |   rbind(matches)
115 | 
116 | matches <- cases %>% 
117 |   filter(!(case_id %in% matches$case_id))%>%
118 |   # Match on Name alone
119 |   select(-SecondLastName, -LastName) %>%
120 |   left_join(govt) %>%
121 |   # Age differences <=5
122 |   filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>%
123 |   # Matching death municipality or residence municipality
124 |   filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>%
125 |   # Matching death date: smaller than 10 day difference, OR matching DeathFacility:
126 |   filter(
127 |     (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 
128 |     | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital')
129 |     | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada')
130 |   ) %>%
131 |   select(case_id, DN, DeathNumber) %>%
132 |   rbind(matches)
133 |   
134 | 
135 | matched_cases <- cases %>%
136 |   filter(case_id %in% matches$case_id) %>%
137 |   left_join(matches) %>%
138 |   select(-Name,-LastName,-SecondLastName) %>%
139 |   left_join(govt)
140 |   
141 | 
142 | ###########################
143 | ### TAKE OUT DUPLICATES ###
144 | ###########################
145 | 
146 | # load the existing cases
147 | index <- read_csv('data/case_index.csv')
148 | 
149 | # cases not existing in the current database
150 | matched_cases <- matched_cases %>%
151 |   filter(!DN %in% index$DN)
152 | 
153 | # cases do not replicate themselves 
154 | # -- if there are duplicated responses, just pick one
155 | matched_cases <- matched_cases %>%
156 |   arrange(DeathNumber) %>%
157 |   mutate(last_case = lag(DeathNumber)) %>%
158 |   filter(is.na(last_case) | (last_case != DeathNumber)) %>%
159 |   select(-last_case)
160 | 
161 | 
162 | ##########################
163 | ### GENERATE BITACORAS ###
164 | ##########################
165 | 
166 | matched_cases <- matched_cases %>%
167 |   mutate(id = max(max(index$id),3000) + row_number())
168 | 
169 | source("fillingBitacoras.R")
170 | 
171 | ## ~~ reporters to process the bitacoras
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/fillingBitacoras.R:
--------------------------------------------------------------------------------
  1 | ### CREATE EMPTY FORMS ###
  2 | library(xlsx)
  3 | library(openxlsx)
  4 | 
  5 | tmp1 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 1)
  6 | tmp2 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 2)
  7 | tmp3 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 3)
  8 | 
  9 | 
 10 | for (i in 1:nrow(matched_cases)) {
 11 |   # i=1
 12 |   print(i)
 13 |   
 14 |   form <- createWorkbook()
 15 |   caseInfo <- createSheet(form, sheetName="Información del caso")
 16 |   questionnaire <- createSheet(form, sheetName="Cuestionario")
 17 |   statusInfo <- createSheet(form, sheetName="Estatus del caso")
 18 |   
 19 |   #####################
 20 |   ## FILL CASE SHEET ##
 21 |   #####################
 22 |   
 23 |   caseRows <- c('Información del caso',tmp1$`Información del caso`)
 24 |   col2 <- rep('',length(caseRows))
 25 |   caseData <- data.frame(col1 = caseRows, col2 = col2)
 26 |   addDataFrame(x=caseData, sheet=caseInfo, row.names = F, col.names = F)
 27 |   caseCells = getCells(getRows(caseInfo), colIndex=1:2)
 28 |   
 29 |   # ADD STYLES
 30 |   
 31 |   al <- Alignment(wrapText = T)
 32 |   headBorder <- Border(color='black',position='TOP', pen='BORDER_MEDIUM')
 33 |   headStyle <- CellStyle(form) +CellProtection(locked=T) +Font(form, isBold = T, heightInPoints = 14) + Alignment(wrapText = T)
 34 |   setCellStyle(caseCells$`1.1`, headStyle)
 35 |   # add border
 36 |   CB.setBorder(CellBlock(caseInfo, 1,1,1,2,FALSE), headBorder, colIndex=1:2, rowIndex=1)
 37 |   # spacing
 38 |   setColumnWidth(caseInfo, 1, 30)
 39 |   
 40 |   
 41 |   ########################
 42 |   ## FILL QUESTIONNAIRE ##
 43 |   ########################
 44 |   
 45 |   col1 <- c('Información del informante',tmp2$`Información del informante`)
 46 |   col1 <- ifelse(is.na(col1),'', col1)
 47 |   
 48 |   col2 <- c('',tmp2$X__1)
 49 |   col2 <- ifelse(is.na(col2),'', col2)
 50 |   
 51 |   col3 <- c('',tmp2$X__2)
 52 |   col3 <- ifelse(is.na(col3),'', col3)
 53 |   
 54 |   col4 <- c('',tmp2$X__3)
 55 |   col4 <- ifelse(is.na(col4),'', col4)
 56 |   
 57 |   col5 <- c('',tmp2$X__4)
 58 |   col5 <- ifelse(is.na(col5),'', col5)
 59 |   
 60 |   
 61 |   qData <- data.frame(col1 = col1, col2 = col2, col3 = col3, col4 = col4, col5 = col5)
 62 |   addDataFrame(x=qData, sheet=questionnaire, row.names = F, col.names = F)
 63 |   qCells = getCells(getRows(questionnaire), colIndex=1:5)
 64 |   
 65 |   # ADD STYLES
 66 |   
 67 |   # spacing
 68 |   setColumnWidth(questionnaire, 1, 60)
 69 |   setColumnWidth(questionnaire, 2, 40)
 70 |   setColumnWidth(questionnaire, 3, 40)
 71 |   setColumnWidth(questionnaire, 4, 40)
 72 |   setColumnWidth(questionnaire, 5, 40)
 73 |   
 74 |   # cell stlyes
 75 |   setCellStyle(qCells$`1.1`, headStyle)
 76 |   setCellStyle(qCells$`7.1`, headStyle)
 77 |   setCellStyle(qCells$`10.1`, headStyle)
 78 |   setCellStyle(qCells$`13.1`, headStyle)
 79 |   setCellStyle(qCells$`16.1`, headStyle)
 80 |   setCellStyle(qCells$`45.1`, headStyle)
 81 |   setCellStyle(qCells$`48.1`, headStyle)
 82 |   
 83 |   setCellStyle(qCells$`17.1`, CellStyle(form)+CellProtection(locked=T)+Font(form, isBold = T))
 84 |   
 85 |   sourceStyle <- CellStyle(form)+CellProtection(locked=T)+Font(form, color = 'red', isBold=T)
 86 |   setCellStyle(qCells$`2.2`, sourceStyle)
 87 |   setCellStyle(qCells$`2.3`, sourceStyle)
 88 |   setCellStyle(qCells$`18.2`, sourceStyle)
 89 |   setCellStyle(qCells$`18.3`, sourceStyle)
 90 |   
 91 |   intStyle <- CellStyle(form)+CellProtection(locked=T)+Font(form, color = 'blue', isBold=T)
 92 |   setCellStyle(qCells$`18.4`, intStyle)
 93 |   setCellStyle(qCells$`18.5`, intStyle)
 94 |   
 95 |   qStyle <- CellStyle(form)+CellProtection(locked=T)+Alignment(wrapText = T)+Font(form, isItalic = T)
 96 |   setCellStyle(qCells$`8.1` ,qStyle)
 97 |   setCellStyle(qCells$`11.1` ,qStyle)
 98 |   setCellStyle(qCells$`14.1` ,qStyle)
 99 |   setCellStyle(qCells$`24.1` ,qStyle)
100 |   setCellStyle(qCells$`26.1` ,qStyle)
101 |   setCellStyle(qCells$`28.1` ,qStyle)
102 |   setCellStyle(qCells$`29.1` ,qStyle)
103 |   setCellStyle(qCells$`34.1` ,qStyle)
104 |   setCellStyle(qCells$`35.1` ,qStyle)
105 |   setCellStyle(qCells$`36.1` ,qStyle)
106 |   setCellStyle(qCells$`37.1` ,qStyle)
107 |   setCellStyle(qCells$`38.1` ,qStyle)
108 |   setCellStyle(qCells$`39.1` ,qStyle)
109 |   setCellStyle(qCells$`40.1` ,qStyle)
110 |   setCellStyle(qCells$`41.1` ,qStyle)
111 |   setCellStyle(qCells$`42.1` ,qStyle)
112 |   setCellStyle(qCells$`43.1` ,qStyle)
113 |   setCellStyle(qCells$`46.1` ,qStyle)
114 |   
115 |   # add note style
116 |   noteFontHed = Font(form, color = 'gray30',isBold = T)
117 |   noteFont = Font(form, color = 'gray40')
118 |   CB.setBorder(CellBlock(questionnaire, 54,1,1,5,FALSE), headBorder, colIndex=1:5, rowIndex=1)
119 |   CB.setFont(CellBlock(questionnaire, 54,1,11,1,FALSE), noteFont, colIndex=1, rowIndex=1:11)
120 |   setCellStyle(qCells$`55.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
121 |   setCellStyle(qCells$`56.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
122 |   setCellStyle(qCells$`57.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
123 |   setCellStyle(qCells$`59.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
124 |   setCellStyle(qCells$`61.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
125 |   setCellStyle(qCells$`62.1`, CellStyle(form)+noteFont+Alignment(wrapText = T))
126 |   
127 |   # fill
128 |   CB.setFill(CellBlock(questionnaire, 19,4,4,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:4)
129 |   setCellStyle(qCells$`24.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue'))
130 |   setCellStyle(qCells$`26.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue'))
131 |   CB.setFill(CellBlock(questionnaire, 28,4,2,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:2)
132 |   CB.setFill(CellBlock(questionnaire, 34,4,7,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:7)
133 |   setCellStyle(qCells$`49.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue'))
134 |   # CB.setFill(CellBlock(questionnaire, 29,3,4,1,FALSE), Fill(backgroundColor = '#FFC0CB'), colIndex=1, rowIndex=1:4)
135 |   # setCellStyle(qCells$`39.3`,CellStyle(form)+Fill(backgroundColor = '#FFC0CB'))
136 |   
137 |   
138 |   filledStyle <- CellStyle(form)+CellProtection(locked=T)+Alignment(wrapText = T)
139 |   setCellStyle(qCells$`36.3`, filledStyle)
140 |   setCellStyle(qCells$`39.2`, filledStyle)
141 |   setCellStyle(qCells$`39.3`, filledStyle)
142 |   setCellStyle(qCells$`40.2`, filledStyle)
143 |   
144 |   
145 |   ############################
146 |   ## FILL INTERVIEWER SHEET ##
147 |   ############################
148 |   
149 |   interviewerRows <- c('Primera llamada',tmp3$`Primera llamada`)
150 |   col2 <- rep('',length(interviewerRows))
151 |   interviewerData <- data.frame(col1 = interviewerRows, col2 = col2)
152 |   addDataFrame(x=interviewerData, sheet=statusInfo, row.names = F, col.names = F)
153 |   statusCells = getCells(getRows(statusInfo), colIndex=1:2)
154 |   
155 |   # ADD STYLES
156 |   setCellStyle(statusCells$`1.1`, headStyle)
157 |   setCellStyle(statusCells$`14.1`, headStyle)
158 |   CB.setBorder(CellBlock(statusInfo, 14,1,1,2,FALSE), headBorder, colIndex=1:2, rowIndex=1)
159 |   setColumnWidth(statusInfo, 1, 30)
160 |   setColumnWidth(statusInfo, 2, 30)
161 |   
162 |   createFreezePane(questionnaire, rowSplit = 1, colSplit = 2)
163 |   
164 |   #### end of creating sheet
165 |   #########################
166 |   #### ADD DATA ###########
167 |   #########################
168 |   
169 |   case <- matched_cases[i,]
170 |   print(case$`Nombre del fallecido:`)
171 |   # if (i >= 32 & i <42) {case$id = case$id +1 }
172 |   # if (i >= 42 & i <60) {case$id = case$id +2 }
173 |   # if (i >= 60) {case$id = case$id +3 }
174 |   
175 |   
176 |   # Case Info
177 |   setCellValue(caseCells$`2.2`,case$id,showNA = F)
178 |   
179 |   # Questionnare Info
180 |   
181 |   # Intro
182 |   setCellValue(qCells$`3.2`, paste(case$`Su nombre:`, case$`Sus apellidos:`, sep=' '), showNA = F)
183 |   setCellValue(qCells$`4.2`, case$`Su teléfono:`, showNA = F)
184 |   setCellValue(qCells$`5.2`, case$`Su correo electrónico:`, showNA = F)
185 |   
186 |   # Section 4
187 |   setCellValue(qCells$`19.2`, case$`Nombre del fallecido:`, showNA = F)
188 |   setCellValue(qCells$`21.2`, case$`Apellidos:`, showNA = F)
189 |   # --relatoinship
190 |   setCellValue(qCells$`24.2`, case$`Su relación con el fallecido es:`, showNA = F)
191 |   #-- age
192 |   setCellValue(qCells$`26.2`, case$`Edad del fallecido:`, showNA = F)
193 |   
194 |   #-- birth date
195 |   # setCellValue(qCells$`28.2`, case$`Fecha de nacimiento del fallecido:`, showNA = F)
196 |   
197 |   #--residency place
198 |   setCellValue(qCells$`29.2`, case$`Lugar de residencia:`, showNA=F)
199 |   setCellValue(qCells$`30.2`, case$`La dirección física exacta del fallecido es:`, showNA = F)
200 |   # --5 date of death
201 |   setCellValue(qCells$`34.2`, case$DeathDate, showNA=F)
202 |   # -- 5.1 municipality of death
203 |   setCellValue(qCells$`35.2`, case$`Lugar del fallecimiento:`, showNA=F)
204 |   # --7 where did the person die
205 |   setCellValue(qCells$`37.2`, case$`¿Dónde murió la persona?`, showNA = F)
206 |   #---8 cause of death
207 |   setCellValue(qCells$`39.2`, case$`El fallecido murió a causa de:`, showNA = F)
208 |   #---9 how is it related to maria
209 |   setCellValue(qCells$`40.2`, case$`¿Puedes describir lo que sucedió y cómo se relaciona la muerte con María o la crisis causada por ese huracán?`, showNA = F)
210 |   
211 |   #### govt database fills
212 |   
213 |   setCellValue(caseCells$`3.2`,case$CertificateNumber,showNA = F)
214 |   setCellValue(caseCells$`4.2`,case$DN,showNA = F)
215 |   setCellValue(caseCells$`5.2`,case$TypeOfDeath,showNA = F)
216 |   
217 |   setCellValue(qCells$`3.3`, case$InformantName, showNA = F)
218 |   setCellValue(qCells$`19.3`, case$Name, showNA = F)
219 |   setCellValue(qCells$`20.3`, case$MiddleName, showNA = F)
220 |   setCellValue(qCells$`21.3`, case$LastName, showNA = F)
221 |   setCellValue(qCells$`22.3`, case$SecondLastName, showNA = F)
222 |   setCellValue(qCells$`24.3`, case$InformantRelationship, showNA = F)
223 |   setCellValue(qCells$`26.3`, case$Age, showNA = F)
224 |   setCellValue(qCells$`28.3`, paste(case$BirthDate_Year, case$BirthDate_Month, case$BirthDate_Day,sep="-"), showNA = F)
225 |   setCellValue(qCells$`29.3`, case$ResidencePlace, showNA=F)
226 |   setCellValue(qCells$`30.3`, case$ResidencePlaceAddress1, showNA=F)
227 |   setCellValue(qCells$`31.3`, case$ResidencePlaceAddress2, showNA=F)
228 |   setCellValue(qCells$`32.3`, case$ResidencePlaceAddress3, showNA=F)
229 |   setCellValue(qCells$`34.3`, paste(case$DeathDate_Year, case$DeathDate_Month, case$DeathDate_Day, sep='-'), showNA=F)
230 |   setCellValue(qCells$`35.3`, case$MunicipalityDeathPlace, showNA=F)
231 |   # -- 6.occupation
232 |   setCellValue(qCells$`36.3`, tolower(paste(case$Industry, case$Occupation,sep="|")), showNA=F)  
233 |   setCellValue(qCells$`37.3`, case$DeathPlace, showNA = F)
234 |   # --7.1 facility name
235 |   setCellValue(qCells$`38.3`, case$DeathFacility, showNA = F)
236 |   setCellValue(qCells$`39.3`, case$nchsti, showNA = F)
237 |   
238 |   saveWorkbook(form, paste("bitacoras/Empty/bitacora_", case$id, '.xlsx',sep=""))
239 |   
240 | }
241 | 
242 | 


--------------------------------------------------------------------------------
/processBitacoras.R:
--------------------------------------------------------------------------------
  1 | # library(xlsx)
  2 | # library(readxl)
  3 | # library(tidyverse)
  4 | # library(stringr)
  5 | 
  6 | library(purrr)
  7 | 
  8 | ######################
  9 | ### LOAD BITACORAS ###
 10 | ######################
 11 | 
 12 | 
 13 | # read_bitacora() by larry
 14 | read_bitacora <- function(filename) {
 15 |   
 16 |   page1 <- read_excel(filename, sheet = "Información del caso", col_types = 'text')
 17 |   page2 <- read_excel(filename, sheet = "Cuestionario", col_types = 'text')
 18 |   page3 <- read_excel(filename, sheet = "Estatus del caso", col_types = 'text')
 19 |   
 20 |   page1_data <- data.frame(t(page1$`X__1`))
 21 |   colnames(page1_data) <-
 22 |     c(
 23 |       "id",
 24 |       "Certificate Number",
 25 |       "DN",
 26 |       "TypeOfDeath"
 27 |     )
 28 |   
 29 |   page2_data <-
 30 |     data.frame(t(page2[c(2:4, 17:21, 23, 25, 27:31, 33:39, 48),2:5]))
 31 |   colnames(page2_data) <-
 32 |     c(
 33 |       "InformantName",
 34 |       "InformantPhone",
 35 |       "InformantMail",
 36 |       "Source",
 37 |       "VictimName",
 38 |       "VictimMiddleName",
 39 |       "VictimLastName",
 40 |       "VictimSecondLastName",
 41 |       "1. InformantRelationship",
 42 |       "2. VictimAge",
 43 |       "3. VictimDOB",
 44 |       "4. VictimResidence",
 45 |       "4. VictimResidence1",
 46 |       "4. VictimResidence2",
 47 |       "4. VictimResidence3",
 48 |       "5. VictimDeathDate",
 49 |       "5.1. VictimDeathMunicipality",
 50 |       "6. VictimOccupation",
 51 |       "7. DeathFacility",
 52 |       "7.1. DeathFacilityAddress",
 53 |       "8. Cause of Death",
 54 |       "9. Circumstances and hurricane relevance",
 55 |       "Direct/indirect CDC criterion"
 56 |     )
 57 |   
 58 |   # Convert page 3 into a data frame
 59 |   page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)]))
 60 |   colnames(page3_data) <-
 61 |     c(
 62 |       paste(
 63 |         "First call",
 64 |         c(
 65 |           "Interviewer",
 66 |           "Date and Time",
 67 |           "Interview conducted",
 68 |           "Message on the phone",
 69 |           "Text message",
 70 |           "Number does not work",
 71 |           "No number",
 72 |           "Rescheduled call",
 73 |           "Person did not want to cooperate"
 74 |         ),
 75 |         sep = '-'
 76 |       ),
 77 |       paste(
 78 |         "Second call",
 79 |         c(
 80 |           "Interviewer",
 81 |           "Date and Time",
 82 |           "Interview conducted",
 83 |           "Message on the phone",
 84 |           "Text message",
 85 |           "Number does not work",
 86 |           "No number",
 87 |           "Rescheduled call",
 88 |           "Person did not want to cooperate"
 89 |         ),
 90 |         sep = '-'
 91 |       )
 92 |     )
 93 |   
 94 |   # Put it all in one
 95 |   data <- cbind(page1_data, page2_data, page3_data) %>%
 96 |     mutate(filename = filename)
 97 |   
 98 |   return(data)
 99 | }
100 | 
101 | 
102 | bitacoras <-
103 |   file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>%
104 |   map(read_bitacora) %>%
105 |   reduce(bind_rows) %>%
106 |   mutate(id = as.character(round(parse_number(id))),
107 |          `Certificate Number` = as.character(round(parse_number(`Certificate Number`))),
108 |          DN = as.character(round(parse_number(DN))))
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | ######################
117 | ### CLEAN UP CASES ###
118 | ######################
119 | 
120 | 
121 | 
122 | getUnaccented <- function(input) {
123 |   return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input))
124 | }
125 | 
126 | processCols <- function(dt) {
127 |   # remove informant and caller info
128 |   dt <- dt[ , !grepl( "Informant" , names(dt))]
129 |   dt <- dt[ , !grepl( "call" , names(dt))]
130 |   dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))]
131 |   dt <- dt[ , !grepl( "Certificate Number" , names(dt))]
132 | 
133 |   # take out question numbers from colnames
134 |   colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt))
135 | 
136 |   return(dt)
137 | }
138 | 
139 | bitacoras_cleaned <- processCols(bitacoras) %>%
140 |   select(-filename, -VictimOccupation)
141 | 
142 | getDB <- function(var_name){
143 |     bitacoras_cleaned %>%
144 |         select(Source, var_name, DN,id) %>%
145 |         group_by(DN,id) %>%
146 |         spread(Source, var_name) %>%
147 |         rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`)
148 | }
149 | 
150 | 
151 | ##############################################
152 | ##### GET BASIC CASE INFO FROM BITACORAS #####
153 | ##############################################
154 | 
155 | 
156 | #VictimName
157 | # If confirmed by interview, take DB name first
158 | # If no DB name available, take SVY name
159 | name1 <- getDB("VictimName") %>%
160 |     mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
161 |     select(VictimName)
162 | name2 <- getDB("VictimMiddleName") %>%
163 |     mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
164 |     select(VictimMiddleName)
165 | name3 <- getDB("VictimLastName") %>%
166 |     mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
167 |     select(VictimLastName)
168 | name4 <- getDB("VictimSecondLastName") %>%
169 |     mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
170 |     select(VictimSecondLastName)
171 | 
172 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4)
173 | 
174 | 
175 | # age
176 | part_age <- getDB("VictimAge") %>%
177 |     mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
178 |     select(VictimAge)
179 | 
180 | # residential address
181 | municipalities <- readxl::read_excel('data/municipalities.xlsx')
182 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|'))
183 | part_res <- getDB("VictimResidence") %>%
184 |     mutate(VictimResidenceMunicipality = ifelse(
185 |         toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto',
186 |         DB,
187 |         ifelse(
188 |             # if there is a municipality in INT.VictimResidence
189 |             str_detect(getUnaccented(tolower(INT)), mu),
190 |             # then take this municipality
191 |             str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
192 |             # otherwise take the DB
193 |             DB)
194 |         )) %>%
195 |     select(VictimResidenceMunicipality) %>%
196 |     mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality))
197 |     
198 | 
199 | 
200 | # death date
201 | part_deathdate <- getDB("VictimDeathDate") %>%
202 |     mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>%
203 |     mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>%
204 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
205 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
206 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
207 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
208 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
209 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
210 |     mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
211 |     select(VictimDeathDate)
212 | 
213 | 
214 | 
215 | # death province
216 | part_deathPl <- getDB('VictimDeathMunicipality') %>%
217 |     mutate(
218 |         VictimDeathMunicipality = ifelse(
219 |             (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')),
220 |             DB,
221 |             ifelse(
222 |                 # if there is a municipality in INT
223 |                 str_detect(getUnaccented(tolower(INT)), mu),
224 |                 # then take this municipality
225 |                 str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
226 |                 # otherwise take the survey VictimResidence
227 |                 SVY
228 |       ))
229 |   ) %>%
230 |   select(VictimDeathMunicipality) %>%
231 |   mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ','')))
232 | 
233 | part_cause <- getDB('Direct/indirect CDC criterion') %>%
234 |     mutate(cause = INT) %>%
235 |     select(cause)
236 |     
237 | output <- part_name %>%
238 |     merge(part_age) %>% 
239 |     merge(part_res) %>%
240 |     merge(part_deathdate) %>%
241 |     merge(part_deathPl) %>%
242 |     merge(part_cause) %>%
243 |     filter(is.na(cause) | cause != 'no relacionada') %>%
244 |     mutate(VictimAge =as.numeric(VictimAge)) %>%
245 |     mutate(id = as.numeric(id)) %>%
246 |     mutate(DN = as.numeric(DN)) %>%
247 |     mutate(source = 'survey') %>%
248 |     select(-cause)
249 | 
250 | 
251 | ####################################
252 | ##### ADD DEMOGRAPHIC ANALYSIS #####
253 | ####################################
254 | 
255 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>%
256 |     filter(!is.na(nchsti)) %>%
257 |     mutate(causes = nchsti, DN= DeathNumber) %>%
258 |     select(DN, causes)
259 | 
260 | output <- output %>% 
261 |     left_join(grouped_cause)
262 | 
263 | ######################################
264 | ##### FORMAT FOR THE INTERACTIVE #####
265 | ######################################
266 | 
267 | library(lubridate)
268 | basic <- output %>%
269 |     mutate(
270 |         VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName),
271 |         VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName),
272 |         name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)),
273 |         age = VictimAge,
274 |         dmu = tolower(VictimDeathMunicipality),
275 |         rmu = tolower(VictimResidenceMunicipality),
276 |         date = VictimDeathDate,
277 |         month = month(as.Date(VictimDeathDate))
278 |     ) %>%
279 |     select(id, DN, name, age, dmu, rmu,date, month, source, causes)
280 | 
281 | 
282 | #############################################
283 | ##### ADD IN CAUSES OF DEATH CATEGORIES #####
284 | #############################################
285 | 
286 | causes <- c('Condición de salud directamente relacionada con el huracán',
287 |             'Daños ocasionados por el huracán',
288 |             "Falta de electricidad","Falta de agua o comida",
289 |             "Falta de acceso a atención médica",
290 |             "Falta de acceso a las comunicaciones")
291 | 
292 | basic <- 
293 |     basic %>%
294 |     merge(getDB('Cause of Death') %>%
295 |     select(SVY, DN,id) %>%
296 |     mutate(
297 |         c1 = ifelse(str_detect(SVY, causes[1]),1,0),
298 |         c2 = ifelse(str_detect(SVY, causes[2]),1,0),
299 |         c3 = ifelse(str_detect(SVY, causes[3]),1,0),
300 |         c4 = ifelse(str_detect(SVY, causes[4]),1,0),
301 |         c5 = ifelse(str_detect(SVY, causes[5]),1,0),
302 |         c6 = ifelse(str_detect(SVY, causes[6]),1,0)
303 |     ) %>%
304 |     select(DN,id, c1,c2,c3,c4,c5,c6))
305 | 
306 | 
307 | ##############################
308 | ##### GET INTERVIEW DATA #####
309 | ##############################
310 | 
311 | interview <- getDB('Circumstances and hurricane relevance') %>%
312 |     mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>% 
313 |     select(text_field_es)
314 |     
315 | basic %>%
316 |     merge(interview) %>%
317 |     mutate(causes_en = causes, 
318 |            causes_es = '', text_field_en = '',
319 |            PlaceOfDeath=dmu, 
320 |            DateOfDeath = date) %>%
321 |     select(id, name, DateOfDeath, causes_en, causes_es, text_field_en, text_field_es) %>%
322 |     write.xlsx('data/forProcess/for_translation_copyedits.xlsx',row.names=F)
323 |     
324 | basic %>%
325 |   write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F)
326 | 
327 | 
328 | ## ~~~ copy edits and translation
329 | 


--------------------------------------------------------------------------------
/data/case_index.csv:
--------------------------------------------------------------------------------
  1 | id,DN,DeathNumber
  2 | 1268,20662_2017,20662
  3 | 1267,3148_2018,3148
  4 | 1147,22830_2017,22830
  5 | 1146,24489_2017,24489
  6 | 1143,23229_2017,23229
  7 | 1250,12790_2018,12790
  8 | 1251,20610_2017,20610
  9 | 1252,21509_2017,21509
 10 | 1253,21893_2017,21893
 11 | 1254,22664_2017,22664
 12 | 1270,1966_2018,1966
 13 | 1273,9396_2018,9396
 14 | 1274,9451_2018,9451
 15 | 1275,10138_2018,10138
 16 | 1276,12175_2018,12175
 17 | 1293,23215_2017,23215
 18 | 1214,30854_2017,30854
 19 | 1247,2675_2018,2675
 20 | 1245,1584_2018,1584
 21 | 1246,2091_2018,2091
 22 | 1163,130_2018,130
 23 | 1219,21910_2017,21910
 24 | 1223,22363_2017,22363
 25 | 1160,21650_2017,21650
 26 | 1164,20416_2017,20416
 27 | 1172,21542_2017,21542
 28 | 1176,21746_2017,21746
 29 | 1182,22419_2017,22419
 30 | 1228,22792_2017,22792
 31 | 1229,22807_2017,22807
 32 | 1233,23322_2017,23322
 33 | 1285,24390_2017,24390
 34 | 1286,25208_2017,25208
 35 | 1287,26580_2017,26580
 36 | 1288,29620_2017,29620
 37 | 1289,30396_2017,30396
 38 | 1290,30894_2017,30894
 39 | 1281,22342_2017,22342
 40 | 1280,22235_2017,22235
 41 | 1248,7194_2018,7194
 42 | 1166,20505_2017,20505
 43 | 1277,20139_2017,20139
 44 | 1283,23483_2017,23483
 45 | 1284,24380_2017,24380
 46 | 1291,20493_2017,20493
 47 | 1249,11861_2018,11861
 48 | 1238,24299_2017,24299
 49 | 1240,25438_2017,25438
 50 | 1241,26584_2017,26584
 51 | 1170,21407_2017,21407
 52 | 1193,23790_2017,23790
 53 | 1196,24492_2017,24492
 54 | 1202,25886_2017,25886
 55 | 1205,26530_2017,26530
 56 | 1208,27155_2017,27155
 57 | 1157,20720_2017,20720
 58 | 1158,20886_2017,20886
 59 | 1165,20423_2017,20423
 60 | 1169,21276_2017,21276
 61 | 1236,24029_2017,24029
 62 | 1203,25914_2017,25914
 63 | 1216,20849_2017,20849
 64 | 1194,24170_2017,24170
 65 | 1187,23369_2017,23369
 66 | 1237,24038_2017,24038
 67 | 1260,26595_2017,26595
 68 | 1263,29084_2017,29084
 69 | 1262,28520_2017,28520
 70 | 1162,28981_2017,28981
 71 | 1186,23207_2017,23207
 72 | 1201,25628_2017,25628
 73 | 1215,30904_2017,30904
 74 | 1259,26450_2017,26450
 75 | 1189,23581_2017,23581
 76 | 1173,21616_2017,21616
 77 | 1199,24944_2017,24944
 78 | 1181,22413_2017,22413
 79 | 1212,29786_2017,29786
 80 | 1213,30192_2017,30192
 81 | 1264,29380_2017,29380
 82 | 1265,30601_2018,30601
 83 | 1033,22736_2017,22736
 84 | 1006,2471_2018,2471
 85 | 1025,21878_2017,21878
 86 | 1035,22769_2017,22769
 87 | 1064,26678_2017,26678
 88 | 1074,28658_2017,28658
 89 | 1053,24661_2017,24661
 90 | 1080,30428_2017,30428
 91 | 1020,20936_2017,20936
 92 | 1021,20937_2017,20937
 93 | 1048,24016_2017,24016
 94 | 1059,26079_2017,26079
 95 | 1060,26475_2017,26475
 96 | 1070,27851_2017,27851
 97 | 1082,29027_2017,29027
 98 | 1084,24973_2017,24973
 99 | 1088,24697_2017,24697
100 | 1112,22619_2017,22619
101 | 1002,126_2018,126
102 | 1078,29931_2017,29931
103 | 1113,22679_2017,22679
104 | 1071,27996_2017,27996
105 | 1098,24292_2017,24292
106 | 1050,24211_2017,24211
107 | 1039,23136_2017,23136
108 | 1044,23631_2017,23631
109 | 1072,28310_2017,28310
110 | 1114,25132_2017,25132
111 | 1043,23801_2017,23801
112 | 1068,27764_2017,27764
113 | 1099,30418_2017,30418
114 | 1104,25758_2017,25758
115 | 1103,24487_2017,24487
116 | 1009,6072_2018,6072
117 | 1018,20757_2017,20757
118 | 1028,22225_2017,22225
119 | 1038,23051_2017,23051
120 | 1047,23999_2017,23999
121 | 1056,25351_2017,25351
122 | 1076,29423_2017,29423
123 | 1037,23030_2017,23030
124 | 1107,30389_2017,30389
125 | 1008,3318_2018,3318
126 | 1054,24730_2017,24730
127 | 1066,26784_2017,26784
128 | 1075,28965_2017,28965
129 | 1092,23509_2017,23509
130 | 1102,24139_2017,24139
131 | 1117,23141_2017,23141
132 | 1122,10941_2018,10941
133 | 1046,23901_2017,23901
134 | 1055,24805_2017,24805
135 | 1017,20462_2017,20462
136 | 1093,24284_2017,24284
137 | 1108,30443_2017,30443
138 | 1014,20195_2017,20195
139 | 1052,24412_2017,24412
140 | 1003,828_2018,828
141 | 1005,1193_2018,1193
142 | 1022,20991_2017,20991
143 | 1057,25472_2017,25472
144 | 1061,26567_2017,26567
145 | 1077,29912_2017,29912
146 | 1085,25993_2017,25993
147 | 1137,22872_2017,22872
148 | 1138,23461_2017,23461
149 | 1139,25034_2017,25034
150 | 1141,28172_2017,28172
151 | 1136,28679_2017,28679
152 | 1007,2898_2018,2898
153 | 1026,22000_2017,22e3
154 | 1029,22242_2017,22242
155 | 1036,22794_2017,22794
156 | 1065,26749_2017,26749
157 | 1024,21569_2017,21569
158 | 1105,25769_2017,25769
159 | 1133,21557_2017,21557
160 | 1135,24298_2017,24298
161 | 1126,25005_2017,25005
162 | 1120,30825_2017,30825
163 | 1129,3255_2018,3255
164 | 1130,20567_2017,20567
165 | 1095,25073_2017,25073
166 | 1023,21439_2017,21439
167 | 1079,30409_2017,30409
168 | 1096,26095_2017,26095
169 | 1097,28176_2017,28176
170 | 1100,20980_2017,20980
171 | 1001,116_2018,116
172 | 1004,1143_2018,1143
173 | 1010,7013_2018,7013
174 | 1011,7749_2018,7749
175 | 1012,8818_2018,8818
176 | 1013,20170_2017,20170
177 | 1015,20349_2017,20349
178 | 1016,20458_2017,20458
179 | 1019,20789_2017,20789
180 | 1027,22216_2017,22216
181 | 1030,22395_2017,22395
182 | 1031,22550_2017,22550
183 | 1034,22745_2017,22745
184 | 1040,23346_2017,23346
185 | 1041,23455_2017,23455
186 | 1042,23611_2017,23611
187 | 1049,24078_2017,24078
188 | 1051,24290_2017,24290
189 | 1058,25847_2017,25847
190 | 1063,26639_2017,26639
191 | 1067,27345_2017,27345
192 | 1069,27821_2017,27821
193 | 1073,28486_2017,28486
194 | 1081,30487_2017,30487
195 | 1083,29197_2017,29197
196 | 1086,23320_2017,23320
197 | 1087,26609_2017,26609
198 | 1091,23462_2017,23462
199 | 1094,24525_2017,24525
200 | 1101,22746_2017,22746
201 | 1106,26168_2017,26168
202 | 1110,22193_2017,22193
203 | 1111,22241_2017,22241
204 | 1115,27308_2017,27308
205 | 1116,29802_2017,29802
206 | 1118,24725_2017,24725
207 | 1119,27557_2017,27557
208 | 1121,30971_2017,30971
209 | 1123,21458_2017,21458
210 | 1124,21573_2017,21573
211 | 1125,24723_2017,24723
212 | 1127,25257_2017,25257
213 | 1128,22938_2017,22938
214 | 1131,20740_2017,20740
215 | 1132,21009_2017,21009
216 | 1134,22652_2017,22652
217 | 1140,25489_2017,25489
218 | 1156,301_2018,301
219 | 1167,20646_2017,20646
220 | 1168,21017_2017,21017
221 | 1171,21515_2017,21515
222 | 1174,21643_2017,21643
223 | 1178,22044_2017,22044
224 | 1180,22295_2017,22295
225 | 1183,22630_2017,22630
226 | 1184,22865_2017,22865
227 | 1185,23160_2017,23160
228 | 1190,23601_2017,23601
229 | 1191,23650_2017,23650
230 | 1195,24418_2017,24418
231 | 1197,24650_2017,24650
232 | 1200,25305_2017,25305
233 | 1204,26058_2017,26058
234 | 1206,26614_2017,26614
235 | 1207,26750_2017,26750
236 | 1209,28183_2017,28183
237 | 1210,28265_2017,28265
238 | 1211,29570_2017,29570
239 | 1217,21295_2017,21295
240 | 1218,21658_2017,21658
241 | 1221,22237_2017,22237
242 | 1222,22328_2017,22328
243 | 1224,22389_2017,22389
244 | 1225,22392_2017,22392
245 | 1226,22622_2017,22622
246 | 1227,22655_2017,22655
247 | 1230,22913_2017,22913
248 | 1231,22978_2017,22978
249 | 1232,23312_2017,23312
250 | 1234,23596_2017,23596
251 | 1235,23710_2017,23710
252 | 1239,24452_2017,24452
253 | 1243,27571_2017,27571
254 | 1244,27730_2017,27730
255 | 1255,22874_2017,22874
256 | 1256,23015_2017,23015
257 | 1257,23288_2017,23288
258 | 1258,23395_2017,23395
259 | 1261,27056_2017,27056
260 | 1269,552_2018,552
261 | 1271,6651_2018,6651
262 | 1272,7197_2018,7197
263 | 1278,20598_2017,20598
264 | 1279,22145_2017,22145
265 | 1292,22197_2017,22197
266 | 2001,22609_2017,22609
267 | 2002,24165_2017,24165
268 | 2004,27282_2017,27282
269 | 2005,24834_2017,24834
270 | 2016,28479_2017,28479
271 | 2017,29420_2017,29420
272 | 2018,28592_2017,28592
273 | 2019,25574_2017,25574
274 | 2020,22682_2017,22682
275 | 2022,23564_2017,23564
276 | 2027,21268_2017,21268
277 | 2028,24971_2017,24971
278 | 2029,20399_2017,20399
279 | 2030,24689_2017,24689
280 | 2031,25074_2017,25074
281 | 2032,27596_2017,27596
282 | 2034,22516_2017,22516
283 | 2035,20650_2017,20650
284 | 2037,23865_2017,23865
285 | 2039,21618_2017,21618
286 | 2040,22206_2017,22206
287 | 2041,20426_2017,20426
288 | 2042,26244_2017,26244
289 | 2045,22618_2017,22618
290 | 2046,23056_2017,23056
291 | 2047,21502_2017,21502
292 | 2053,26997_2017,26997
293 | 2056,24690_2017,24690
294 | 2057,23389_2017,23389
295 | 2058,20341_2017,20341
296 | 2060,31018_2017,31018
297 | 2064,21541_2017,21541
298 | 2065,29581_2017,29581
299 | 2071,21545_2017,21545
300 | 2072,2497_2018,2497
301 | 2074,1112_2018,1112
302 | 2075,27763_2017,27763
303 | 2078,26445_2017,26445
304 | 2079,25599_2017,25599
305 | 2083,25192_2017,25192
306 | 2085,24749_2017,24749
307 | 2087,24862_2017,24862
308 | 2096,27164_2017,27164
309 | 2097,24041_2017,24041
310 | 2100,23147_2017,23147
311 | 2102,21631_2017,21631
312 | 2104,25905_2017,25905
313 | 2108,27279_2017,27279
314 | 2109,22264_2017,22264
315 | 2111,21938_2017,21938
316 | 2114,27897_2017,27897
317 | 2120,21043_2017,21043
318 | 2121,23407_2017,23407
319 | 2126,25178_2017,25178
320 | 2128,26534_2017,26534
321 | 2130,29058_2017,29058
322 | 2132,23161_2017,23161
323 | 2133,24528_2017,24528
324 | 2136,23192_2017,23192
325 | 2140,23579_2017,23579
326 | 2143,22341_2017,22341
327 | 2144,21374_2017,21374
328 | 2149,25933_2017,25933
329 | 2150,21412_2017,21412
330 | 2154,20216_2017,20216
331 | 2155,22737_2017,22737
332 | 2159,27947_2017,27947
333 | 2160,25950_2017,25950
334 | 2165,21748_2017,21748
335 | 2166,25062_2017,25062
336 | 2167,20782_2017,20782
337 | 2168,22843_2017,22843
338 | 2173,29961_2017,29961
339 | 2176,20919_2017,20919
340 | 2177,25127_2017,25127
341 | 2178,28864_2017,28864
342 | 2182,29603_2017,29603
343 | 2183,26507_2017,26507
344 | 2184,23713_2017,23713
345 | 2185,23357_2017,23357
346 | 2187,25337_2017,25337
347 | 2190,27143_2017,27143
348 | 2193,7711_2018,7711
349 | 2206,26257_2017,26257
350 | 2207,22703_2017,22703
351 | 2208,23553_2017,23553
352 | 2209,22891_2017,22891
353 | 2213,26837_2017,26837
354 | 2214,2595_2018,2595
355 | 2008,24736_2017,24736
356 | 2070,20944_2017,20944
357 | 2533,21834_2017,21834
358 | 2501,31009_2017,31009
359 | 2528,25961_2017,25961
360 | 2547,20541_2017,20541
361 | 2502,24957_2017,24957
362 | 2534,23526_2017,23526
363 | 2549,24179_2017,24179
364 | 2537,21659_2017,21659
365 | 2503,22018_2017,22018
366 | 2504,21559_2017,21559
367 | 2505,21445_2017,21445
368 | 2506,22155_2017,22155
369 | 2507,25118_2017,25118
370 | 2553,21830_2017,21830
371 | 2529,21274_2017,21274
372 | 2508,20368_2017,20368
373 | 2509,24849_2017,24849
374 | 2538,27309_2017,27309
375 | 2550,20582_2017,20582
376 | 2532,21897_2017,21897
377 | 2554,22991_2017,22991
378 | 2510,21811_2017,21811
379 | 2511,22021_2017,22021
380 | 2527,23575_2017,23575
381 | 2539,20984_2017,20984
382 | 2555,23039_2017,23039
383 | 2512,26559_2017,26559
384 | 2551,22171_2017,22171
385 | 2513,23656_2017,23656
386 | 2556,23028_2017,23028
387 | 2514,22840_2017,22840
388 | 2515,24080_2017,24080
389 | 2531,24761_2017,24761
390 | 2516,22547_2017,22547
391 | 2557,21877_2017,21877
392 | 2517,22050_2017,22050
393 | 2518,23959_2017,23959
394 | 2540,23067_2017,23067
395 | 2541,21395_2017,21395
396 | 2558,31001_2017,31001
397 | 2519,24846_2017,24846
398 | 2520,21867_2017,21867
399 | 2542,22631_2017,22631
400 | 2521,21593_2017,21593
401 | 2543,21091_2017,21091
402 | 2559,25488_2017,25488
403 | 2522,23307_2017,23307
404 | 2535,21437_2017,21437
405 | 2544,21962_2017,21962
406 | 2523,21844_2017,21844
407 | 2545,22478_2017,22478
408 | 2536,23381_2017,23381
409 | 2524,21887_2017,21887
410 | 2530,22552_2017,22552
411 | 2525,21837_2017,21837
412 | 2526,24033_2017,24033
413 | 2560,21457_2017,21457
414 | 2561,23978_2017,23978
415 | 2563,23165_2017,23165
416 | 2587,22436_2017,22436
417 | 2576,20612_2017,20612
418 | 2589,21013_2017,21013
419 | 2623,31025_2017,31025
420 | 2564,23563_2017,23563
421 | 2567,20263_2017,20263
422 | 2598,23332_2017,23332
423 | 2572,20945_2017,20945
424 | 2568,20296_2017,20296
425 | 2569,23016_2017,23016
426 | 2592,23292_2017,23292
427 | 2577,23818_2017,23818
428 | 2594,24148_2017,24148
429 | 2617,25412_2017,25412
430 | 2608,23911_2017,23911
431 | 2624,24747_2017,24747
432 | 2562,30926_2017,30926
433 | 2573,23384_2017,23384
434 | 2610,22614_2017,22614
435 | 2625,31027_2017,31027
436 | 2609,22078_2017,22078
437 | 2615,21169_2017,21169
438 | 2590,21864_2017,21864
439 | 2578,25081_2017,25081
440 | 2582,21140_2017,21140
441 | 2583,22756_2017,22756
442 | 2614,23604_2017,23604
443 | 2579,23003_2017,23003
444 | 2618,30980_2017,30980
445 | 2593,23641_2017,23641
446 | 2584,22625_2017,22625
447 | 2595,22826_2017,22826
448 | 2596,25160_2017,25160
449 | 2620,26648_2017,26648
450 | 2574,20934_2017,20934
451 | 2603,21006_2017,21006
452 | 2566,23738_2017,23738
453 | 2601,21011_2017,21011
454 | 2606,26378_2017,26378
455 | 2604,21029_2017,21029
456 | 2597,24098_2017,24098
457 | 2619,22138_2017,22138
458 | 2588,21534_2017,21534
459 | 2586,21021_2017,21021
460 | 2575,23273_2017,23273
461 | 2605,25228_2017,25228
462 | 2633,31026_2017,31026
463 | 2580,20996_2017,20996
464 | 2571,20283_2017,20283
465 | 1295,26272_2017,26272
466 | 1296,26957_2017,26957
467 | 1297,21147_2017,21147
468 | 1298,29286_2017,29286
469 | 1299,21125_2017,21125
470 | 1300,28306_2017,28306
471 | 1301,2508_2018,2508
472 | 1303,3798_2018,3798
473 | 1304,6336_2018,6336
474 | 1306,9063_2018,9063
475 | 1307,22467_2017,22467
476 | 1308,29861_2017,29861
477 | 1309,30363_2017,30363
478 | 1310,6660_2018,6660
479 | 1312,21198_2017,21198
480 | 1313,3180_2018,3180
481 | 1315,1580_2018,1580
482 | 3001,10162_2018,10162
483 | 3003,23592_2017,23592
484 | 3004,23795_2017,23795
485 | 3005,23810_2017,23810
486 | 3007,24545_2017,24545
487 | 3008,24552_2017,24552
488 | 3009,24594_2017,24594
489 | 3011,25094_2017,25094
490 | 3013,26185_2017,26185
491 | 3014,26332_2017,26332
492 | 3015,26380_2017,26380
493 | 3016,26645_2017,26645
494 | 3017,28044_2017,28044
495 | 3018,29272_2017,29272
496 | 3019,29439_2017,29439
497 | 3020,29673_2017,29673
498 | 3021,30664_2017,30664
499 | 


--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
  1 | "5.1. VictimDeathMunicipality",
  2 | "6. VictimOccupation",
  3 | "7. DeathFacility",
  4 | "7.1. DeathFacilityAddress",
  5 | "8. Cause of Death",
  6 | "9. Circumstances and hurricane relevance",
  7 | "Direct/indirect CDC criterion"
  8 | )
  9 | # Convert page 3 into a data frame
 10 | page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)]))
 11 | colnames(page3_data) <-
 12 | c(
 13 | paste(
 14 | "First call",
 15 | c(
 16 | "Interviewer",
 17 | "Date and Time",
 18 | "Interview conducted",
 19 | "Message on the phone",
 20 | "Text message",
 21 | "Number does not work",
 22 | "No number",
 23 | "Rescheduled call",
 24 | "Person did not want to cooperate"
 25 | ),
 26 | sep = '-'
 27 | ),
 28 | paste(
 29 | "Second call",
 30 | c(
 31 | "Interviewer",
 32 | "Date and Time",
 33 | "Interview conducted",
 34 | "Message on the phone",
 35 | "Text message",
 36 | "Number does not work",
 37 | "No number",
 38 | "Rescheduled call",
 39 | "Person did not want to cooperate"
 40 | ),
 41 | sep = '-'
 42 | )
 43 | )
 44 | # Put it all in one
 45 | data <- cbind(page1_data, page2_data, page3_data) %>%
 46 | mutate(filename = filename)
 47 | return(data)
 48 | }
 49 | bitacoras <-
 50 | file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>%
 51 | map(read_bitacora) %>%
 52 | reduce(bind_rows) %>%
 53 | mutate(id = as.character(round(parse_number(id))),
 54 | `Certificate Number` = as.character(round(parse_number(`Certificate Number`))),
 55 | DN = as.character(round(parse_number(DN))))
 56 | ######################
 57 | ### CLEAN UP CASES ###
 58 | ######################
 59 | getUnaccented <- function(input) {
 60 | return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input))
 61 | }
 62 | processCols <- function(dt) {
 63 | # remove informant and caller info
 64 | dt <- dt[ , !grepl( "Informant" , names(dt))]
 65 | dt <- dt[ , !grepl( "call" , names(dt))]
 66 | dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))]
 67 | dt <- dt[ , !grepl( "Certificate Number" , names(dt))]
 68 | # take out question numbers from colnames
 69 | colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt))
 70 | return(dt)
 71 | }
 72 | bitacoras_cleaned <- processCols(bitacoras) %>%
 73 | select(-filename, -VictimOccupation)
 74 | getDB <- function(var_name){
 75 | bitacoras_cleaned %>%
 76 | select(Source, var_name, DN,id) %>%
 77 | group_by(DN,id) %>%
 78 | spread(Source, var_name) %>%
 79 | rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`)
 80 | }
 81 | ##############################################
 82 | ##### GET BASIC CASE INFO FROM BITACORAS #####
 83 | ##############################################
 84 | #VictimName
 85 | # If confirmed by interview, take DB name first
 86 | # If no DB name available, take SVY name
 87 | name1 <- getDB("VictimName") %>%
 88 | mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
 89 | select(VictimName)
 90 | name2 <- getDB("VictimMiddleName") %>%
 91 | mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
 92 | select(VictimMiddleName)
 93 | name3 <- getDB("VictimLastName") %>%
 94 | mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
 95 | select(VictimLastName)
 96 | name4 <- getDB("VictimSecondLastName") %>%
 97 | mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
 98 | select(VictimSecondLastName)
 99 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4)
100 | # age
101 | part_age <- getDB("VictimAge") %>%
102 | mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
103 | select(VictimAge)
104 | # residential address
105 | municipalities <- readxl::read_excel('data/municipalities.xlsx')
106 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|'))
107 | part_res <- getDB("VictimResidence") %>%
108 | mutate(VictimResidenceMunicipality = ifelse(
109 | toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto',
110 | DB,
111 | ifelse(
112 | # if there is a municipality in INT.VictimResidence
113 | str_detect(getUnaccented(tolower(INT)), mu),
114 | # then take this municipality
115 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
116 | # otherwise take the DB
117 | DB)
118 | )) %>%
119 | select(VictimResidenceMunicipality) %>%
120 | mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality))
121 | # death date
122 | part_deathdate <- getDB("VictimDeathDate") %>%
123 | mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>%
124 | mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>%
125 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
126 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
127 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
128 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
129 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
130 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
131 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
132 | select(VictimDeathDate)
133 | # death province
134 | part_deathPl <- getDB('VictimDeathMunicipality') %>%
135 | mutate(
136 | VictimDeathMunicipality = ifelse(
137 | (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')),
138 | DB,
139 | ifelse(
140 | # if there is a municipality in INT
141 | str_detect(getUnaccented(tolower(INT)), mu),
142 | # then take this municipality
143 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
144 | # otherwise take the survey VictimResidence
145 | SVY
146 | ))
147 | ) %>%
148 | select(VictimDeathMunicipality) %>%
149 | mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ','')))
150 | part_cause <- getDB('Direct/indirect CDC criterion') %>%
151 | mutate(cause = INT) %>%
152 | select(cause)
153 | output <- part_name %>%
154 | merge(part_age) %>%
155 | merge(part_res) %>%
156 | merge(part_deathdate) %>%
157 | merge(part_deathPl) %>%
158 | merge(part_cause) %>%
159 | filter(is.na(cause) | cause != 'no relacionada') %>%
160 | mutate(VictimAge =as.numeric(VictimAge)) %>%
161 | mutate(id = as.numeric(id)) %>%
162 | mutate(DN = as.numeric(DN)) %>%
163 | mutate(source = 'survey') %>%
164 | select(-cause)
165 | ####################################
166 | ##### ADD DEMOGRAPHIC ANALYSIS #####
167 | ####################################
168 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>%
169 | filter(!is.na(nchsti)) %>%
170 | mutate(causes = nchsti, DN= DeathNumber) %>%
171 | select(DN, causes)
172 | output <- output %>%
173 | left_join(grouped_cause)
174 | ######################################
175 | ##### FORMAT FOR THE INTERACTIVE #####
176 | ######################################
177 | library(lubridate)
178 | basic <- output %>%
179 | mutate(
180 | VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName),
181 | VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName),
182 | name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)),
183 | age = VictimAge,
184 | dmu = tolower(VictimDeathMunicipality),
185 | rmu = tolower(VictimResidenceMunicipality),
186 | date = VictimDeathDate,
187 | month = month(as.Date(VictimDeathDate))
188 | ) %>%
189 | select(id, DN, name, age, dmu, rmu,date, month, source, causes)
190 | #############################################
191 | ##### ADD IN CAUSES OF DEATH CATEGORIES #####
192 | #############################################
193 | causes <- c('Condición de salud directamente relacionada con el huracán',
194 | 'Daños ocasionados por el huracán',
195 | "Falta de electricidad","Falta de agua o comida",
196 | "Falta de acceso a atención médica",
197 | "Falta de acceso a las comunicaciones")
198 | basic <-
199 | basic %>%
200 | merge(getDB('Cause of Death') %>%
201 | select(SVY, DN,id) %>%
202 | mutate(
203 | c1 = ifelse(str_detect(SVY, causes[1]),1,0),
204 | c2 = ifelse(str_detect(SVY, causes[2]),1,0),
205 | c3 = ifelse(str_detect(SVY, causes[3]),1,0),
206 | c4 = ifelse(str_detect(SVY, causes[4]),1,0),
207 | c5 = ifelse(str_detect(SVY, causes[5]),1,0),
208 | c6 = ifelse(str_detect(SVY, causes[6]),1,0)
209 | ) %>%
210 | select(DN,id, c1,c2,c3,c4,c5,c6))
211 | ##############################
212 | ##### GET INTERVIEW DATA #####
213 | ##############################
214 | interview <- getDB('Circumstances and hurricane relevance') %>%
215 | mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>%
216 | select(text_field_es)
217 | library(xlsx)
218 | library(readxl)
219 | library(tidyverse)
220 | library(stringr)
221 | # read_bitacora() by larry
222 | read_bitacora <- function(filename) {
223 | page1 <- read_excel(filename, sheet = "Información del caso", col_types = 'text')
224 | page2 <- read_excel(filename, sheet = "Cuestionario", col_types = 'text')
225 | page3 <- read_excel(filename, sheet = "Estatus del caso", col_types = 'text')
226 | page1_data <- data.frame(t(page1$`X__1`))
227 | colnames(page1_data) <-
228 | c(
229 | "id",
230 | "Certificate Number",
231 | "DN",
232 | "TypeOfDeath"
233 | )
234 | page2_data <-
235 | data.frame(t(page2[c(2:4, 17:21, 23, 25, 27:31, 33:39, 48),2:5]))
236 | colnames(page2_data) <-
237 | c(
238 | "InformantName",
239 | "InformantPhone",
240 | "InformantMail",
241 | "Source",
242 | "VictimName",
243 | "VictimMiddleName",
244 | "VictimLastName",
245 | "VictimSecondLastName",
246 | "1. InformantRelationship",
247 | "2. VictimAge",
248 | "3. VictimDOB",
249 | "4. VictimResidence",
250 | "4. VictimResidence1",
251 | "4. VictimResidence2",
252 | "4. VictimResidence3",
253 | "5. VictimDeathDate",
254 | "5.1. VictimDeathMunicipality",
255 | "6. VictimOccupation",
256 | "7. DeathFacility",
257 | "7.1. DeathFacilityAddress",
258 | "8. Cause of Death",
259 | "9. Circumstances and hurricane relevance",
260 | "Direct/indirect CDC criterion"
261 | )
262 | # Convert page 3 into a data frame
263 | page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)]))
264 | colnames(page3_data) <-
265 | c(
266 | paste(
267 | "First call",
268 | c(
269 | "Interviewer",
270 | "Date and Time",
271 | "Interview conducted",
272 | "Message on the phone",
273 | "Text message",
274 | "Number does not work",
275 | "No number",
276 | "Rescheduled call",
277 | "Person did not want to cooperate"
278 | ),
279 | sep = '-'
280 | ),
281 | paste(
282 | "Second call",
283 | c(
284 | "Interviewer",
285 | "Date and Time",
286 | "Interview conducted",
287 | "Message on the phone",
288 | "Text message",
289 | "Number does not work",
290 | "No number",
291 | "Rescheduled call",
292 | "Person did not want to cooperate"
293 | ),
294 | sep = '-'
295 | )
296 | )
297 | # Put it all in one
298 | data <- cbind(page1_data, page2_data, page3_data) %>%
299 | mutate(filename = filename)
300 | return(data)
301 | }
302 | bitacoras <-
303 | file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>%
304 | map(read_bitacora) %>%
305 | reduce(bind_rows) %>%
306 | mutate(id = as.character(round(parse_number(id))),
307 | `Certificate Number` = as.character(round(parse_number(`Certificate Number`))),
308 | DN = as.character(round(parse_number(DN))))
309 | ######################
310 | ### CLEAN UP CASES ###
311 | ######################
312 | getUnaccented <- function(input) {
313 | return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input))
314 | }
315 | processCols <- function(dt) {
316 | # remove informant and caller info
317 | dt <- dt[ , !grepl( "Informant" , names(dt))]
318 | dt <- dt[ , !grepl( "call" , names(dt))]
319 | dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))]
320 | dt <- dt[ , !grepl( "Certificate Number" , names(dt))]
321 | # take out question numbers from colnames
322 | colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt))
323 | return(dt)
324 | }
325 | bitacoras_cleaned <- processCols(bitacoras) %>%
326 | select(-filename, -VictimOccupation)
327 | getDB <- function(var_name){
328 | bitacoras_cleaned %>%
329 | select(Source, var_name, DN,id) %>%
330 | group_by(DN,id) %>%
331 | spread(Source, var_name) %>%
332 | rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`)
333 | }
334 | ##############################################
335 | ##### GET BASIC CASE INFO FROM BITACORAS #####
336 | ##############################################
337 | #VictimName
338 | # If confirmed by interview, take DB name first
339 | # If no DB name available, take SVY name
340 | name1 <- getDB("VictimName") %>%
341 | mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
342 | select(VictimName)
343 | name2 <- getDB("VictimMiddleName") %>%
344 | mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
345 | select(VictimMiddleName)
346 | name3 <- getDB("VictimLastName") %>%
347 | mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
348 | select(VictimLastName)
349 | name4 <- getDB("VictimSecondLastName") %>%
350 | mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
351 | select(VictimSecondLastName)
352 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4)
353 | # age
354 | part_age <- getDB("VictimAge") %>%
355 | mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>%
356 | select(VictimAge)
357 | # residential address
358 | municipalities <- readxl::read_excel('data/municipalities.xlsx')
359 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|'))
360 | part_res <- getDB("VictimResidence") %>%
361 | mutate(VictimResidenceMunicipality = ifelse(
362 | toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto',
363 | DB,
364 | ifelse(
365 | # if there is a municipality in INT.VictimResidence
366 | str_detect(getUnaccented(tolower(INT)), mu),
367 | # then take this municipality
368 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
369 | # otherwise take the DB
370 | DB)
371 | )) %>%
372 | select(VictimResidenceMunicipality) %>%
373 | mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality))
374 | # death date
375 | part_deathdate <- getDB("VictimDeathDate") %>%
376 | mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>%
377 | mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>%
378 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
379 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
380 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
381 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
382 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
383 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
384 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>%
385 | select(VictimDeathDate)
386 | # death province
387 | part_deathPl <- getDB('VictimDeathMunicipality') %>%
388 | mutate(
389 | VictimDeathMunicipality = ifelse(
390 | (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')),
391 | DB,
392 | ifelse(
393 | # if there is a municipality in INT
394 | str_detect(getUnaccented(tolower(INT)), mu),
395 | # then take this municipality
396 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1],
397 | # otherwise take the survey VictimResidence
398 | SVY
399 | ))
400 | ) %>%
401 | select(VictimDeathMunicipality) %>%
402 | mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ','')))
403 | part_cause <- getDB('Direct/indirect CDC criterion') %>%
404 | mutate(cause = INT) %>%
405 | select(cause)
406 | output <- part_name %>%
407 | merge(part_age) %>%
408 | merge(part_res) %>%
409 | merge(part_deathdate) %>%
410 | merge(part_deathPl) %>%
411 | merge(part_cause) %>%
412 | filter(is.na(cause) | cause != 'no relacionada') %>%
413 | mutate(VictimAge =as.numeric(VictimAge)) %>%
414 | mutate(id = as.numeric(id)) %>%
415 | mutate(DN = as.numeric(DN)) %>%
416 | mutate(source = 'survey') %>%
417 | select(-cause)
418 | ####################################
419 | ##### ADD DEMOGRAPHIC ANALYSIS #####
420 | ####################################
421 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>%
422 | filter(!is.na(nchsti)) %>%
423 | mutate(causes = nchsti, DN= DeathNumber) %>%
424 | select(DN, causes)
425 | output <- output %>%
426 | left_join(grouped_cause)
427 | ######################################
428 | ##### FORMAT FOR THE INTERACTIVE #####
429 | ######################################
430 | library(lubridate)
431 | basic <- output %>%
432 | mutate(
433 | VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName),
434 | VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName),
435 | name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)),
436 | age = VictimAge,
437 | dmu = tolower(VictimDeathMunicipality),
438 | rmu = tolower(VictimResidenceMunicipality),
439 | date = VictimDeathDate,
440 | month = month(as.Date(VictimDeathDate))
441 | ) %>%
442 | select(id, DN, name, age, dmu, rmu,date, month, source, causes)
443 | #############################################
444 | ##### ADD IN CAUSES OF DEATH CATEGORIES #####
445 | #############################################
446 | causes <- c('Condición de salud directamente relacionada con el huracán',
447 | 'Daños ocasionados por el huracán',
448 | "Falta de electricidad","Falta de agua o comida",
449 | "Falta de acceso a atención médica",
450 | "Falta de acceso a las comunicaciones")
451 | basic <-
452 | basic %>%
453 | merge(getDB('Cause of Death') %>%
454 | select(SVY, DN,id) %>%
455 | mutate(
456 | c1 = ifelse(str_detect(SVY, causes[1]),1,0),
457 | c2 = ifelse(str_detect(SVY, causes[2]),1,0),
458 | c3 = ifelse(str_detect(SVY, causes[3]),1,0),
459 | c4 = ifelse(str_detect(SVY, causes[4]),1,0),
460 | c5 = ifelse(str_detect(SVY, causes[5]),1,0),
461 | c6 = ifelse(str_detect(SVY, causes[6]),1,0)
462 | ) %>%
463 | select(DN,id, c1,c2,c3,c4,c5,c6))
464 | ##############################
465 | ##### GET INTERVIEW DATA #####
466 | ##############################
467 | interview <- getDB('Circumstances and hurricane relevance') %>%
468 | mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>%
469 | select(text_field_es)
470 | basic %>%
471 | merge(interview) %>%
472 | mutate(causes_en = causes,
473 | causes_es = '', text_field_en = '',
474 | PlaceOfDeath=dmu,
475 | DateOfDeath = date) %>%
476 | select(id, name, DateOfDeath, causes_en, causes_es, text_field_en, text_field_es)
477 | basic
478 | View(interview)
479 | basic %>%
480 | mutate(PlaceOfDeath=dmu,
481 | DateOfDeath = date)
482 | basic %>%
483 | mutate(PlaceOfDeath=dmu,
484 | DateOfDeath = date) %>%
485 | select(-dmu,-date) %>%
486 | write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F)
487 | library(purrr)
488 | # library(xlsx)
489 | # library(readxl)
490 | # library(tidyverse)
491 | # library(stringr)
492 | library(write.xl)
493 | # library(xlsx)
494 | # library(readxl)
495 | # library(tidyverse)
496 | # library(stringr)
497 | library(writexl)
498 | ?write.xl
499 | ?write.xlsx
500 | ??write.xlsx
501 | basic %>%
502 | mutate(PlaceOfDeath=dmu,
503 | DateOfDeath = date) %>%
504 | select(-dmu,-date) %>%
505 | xlsx::write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F)
506 | library(xlsx)
507 | install.packages('xlsx')
508 | library(xlsx)
509 | library(xlsx)
510 | library(xlsx)
511 | library(xlsx)
512 | library(xlsx)
513 | 


--------------------------------------------------------------------------------