├── .Rapp.history ├── data ├── forProcess │ └── .pl ├── .DS_Store ├── municipalities.xlsx └── case_index.csv ├── bitacoras └── template │ └── bitácora.xlsx ├── .gitignore ├── merge.R ├── Readme.md ├── getBitacoras.R ├── fillingBitacoras.R ├── processBitacoras.R └── .Rhistory /.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/forProcess/.pl: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quartz/maria-followup/master/data/.DS_Store -------------------------------------------------------------------------------- /data/municipalities.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quartz/maria-followup/master/data/municipalities.xlsx -------------------------------------------------------------------------------- /bitacoras/template/bitácora.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quartz/maria-followup/master/bitacoras/template/bitácora.xlsx -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | data/forProcess/*.xlsx 4 | data/forProcess/*.csv 5 | data/Las víctimas de María (Responses).xlsx 6 | /process_script 7 | /bitacoras/Empty 8 | /bitacoras/Filled -------------------------------------------------------------------------------- /merge.R: -------------------------------------------------------------------------------- 1 | ## merge it back into the database 2 | 3 | interview <- readxl::read_excel('data/forProcess/for_translation_copyedits.xlsx') 4 | basis <- readxl::read_excel('data/forProcess/rest_for_merge.xlsx') 5 | 6 | 7 | interview %>% 8 | left_join(basis %>% select(-name, -causes)) %>% 9 | mutate(attr='form_interview') %>% 10 | mutate(month = month(date)) %>% 11 | mutate(text_field = text_field_en, causes = causes_en) %>% 12 | select(age, dmu, rmu, date, month, source, causes, c1,c2,c3,c4,c5,c6,attr,id,name,text_field) %>% 13 | write_csv(paste('data/forProcess/data_', 14 | paste(month(Sys.Date()), day(Sys.Date()), year(Sys.Date()), sep='_'), 15 | '_EN.csv',sep = '')) 16 | 17 | interview %>% 18 | left_join(basis %>% select(-name, -causes)) %>% 19 | mutate(attr='form_interview') %>% 20 | mutate(month = month(date)) %>% 21 | mutate(text_field = text_field_es, causes = causes_es) %>% 22 | select(age, dmu, rmu, date, month, source, causes, c1,c2,c3,c4,c5,c6,attr,id,name,text_field) %>% 23 | write_csv(paste('data/forProcess/data_', 24 | paste(month(Sys.Date()), day(Sys.Date()), year(Sys.Date()), sep='_'), 25 | '_ES.csv',sep = '')) 26 | 27 | 28 | 29 | ## output the current index file with 3 columns:id, deathnumber, dn 30 | existingCases = read_csv('data/case_index.csv') 31 | newCases <- 32 | interview %>% 33 | left_join(basis %>% select(-name, -causes)) %>% 34 | mutate(DeathNumber = DN, DN = paste(DN, year(date),sep='_')) %>% 35 | select(id, DN, DeathNumber)# %>% 36 | # write_csv('data/case_index.csv') 37 | 38 | existingCases %>% 39 | bind_rows(newCases) %>% 40 | write_csv('data/case_index.csv') 41 | 42 | # ~~~ submit pull request 43 | 44 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Maria's Dead follow up 2 | 3 | Instructions and scripts for adding new cases for the Hurricane Maria project. The existing databases live at: 4 | 5 | https://hurricanemariadead.com/ 6 | 7 | https://losmuertosdemaria.com/ 8 | 9 | ### Recent updates: 10 | 11 | Latest data updated on *Dec. 19, 2018*, **504 cases** 12 | 13 | *August 30*, 2018, **487 cases** 14 | 15 | ## Adding new cases 16 | 17 | You need to first download a copy of this repo to your computer. After you are done with the process described below, submit a pull request. 18 | 19 | ### 1. Getting cases from Google Form responses 20 | 21 | [Las víctimas de María (Responses)](https://docs.google.com/spreadsheets/d/1FK0j919EveJg6HJI_2139uQrX4W-jHX6bcBRAqQVSeI/edit#gid=1388179220) 22 | 23 | If you don't have permission, ask Ana Campoy, Omaya Sosa, or Larry Fenn to help. 24 | 25 | * Export the sheet directly: 26 | 27 | `File > Download as ... > Microsoft Excel(.xlsx)` 28 | 29 | * Delete all the cases in the spreadsheet up to the *Last Updated* date at the top of this Readme document you just downloaded. Save it. 30 | 31 | * Put the newly-saved file into the `data` folder: `data/Las víctimas de María (Responses).xlsx`. 32 | 33 | 34 | ### 2. Generate bitacoras 35 | 36 | Run `getBitacoras.R` 37 | 38 | ### 3. Get information from bitacoras 39 | 40 | - Inform Laura Candelas (CPI), who will coordinate verification phone calls using the bitacoras in the folder `bitacoras/Empty/` 41 | - Gather only the complete bitacoras to the folder `bitacoras/Filled/` 42 | 43 | - Run script `processBitacoras.R` 44 | 45 | 46 | ### 4. Copy edits and translation 47 | 48 | Send the file file `data/forProcess/for_translation_copyedits.xlsx` to Laura and Ezequiel for copy edits and translation. They need to do three things: 49 | 50 | * Spanish copy editing for column `text_field_es` (Laura) 51 | * Translate the column `text_field_es` to column `text_field_en` and copy edit `text_field_en` (Ezequiel) 52 | * Translate the column `causes_en` to column `causes_es` (Laura) 53 | 54 | (The person who does copy edits should also do a general additional verification to make sure qualtitative information in `text_field` matches other basic info, such as gender (as inferred from name), clinical cause of death, mechanism of death, age, etc, and make corrections or flag cases accordingly. ) 55 | 56 | When getting back the file, please save it and replace the exising `data/forProcess/for_translation_copyedits.xlsx` 57 | 58 | ### 5. Merge it back into the database 59 | 60 | Run `merge.R` 61 | 62 | ### 6. Submit pull request 63 | 64 | 65 | -------------------------------------------------------------------------------- /getBitacoras.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | library(readxl) 4 | 5 | ###################### 6 | ### READ NEW CASES ### 7 | ###################### 8 | 9 | cases <- readxl::read_xlsx("data/Las víctimas de María (Responses).xlsx",col_types = 'text') 10 | cases <- 11 | cases %>% 12 | mutate(DeathDate = as.character(as.Date(as.numeric(`Fecha del fallecimiento`), origin="1899-12-30"))) %>% 13 | mutate( 14 | case_id = row_number(), 15 | Name = tolower(str_split(`Nombre del fallecido:`,' ',simplify = T)[,1]), 16 | LastName = chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', tolower(str_split(`Apellidos:`,' ',simplify = T)[,1])), 17 | DeathMunicipality=chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', str_squish(tolower(`Lugar del fallecimiento:`))), 18 | ResidenceMunicipality=chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', str_squish(tolower(`Lugar de residencia:`))), 19 | SecondLastName = chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', tolower(str_split(`Apellidos:`,' ',simplify = T)[,2]))) 20 | 21 | 22 | 23 | ##################### 24 | ### CASE MATCHING ### 25 | ##################### 26 | 27 | govt <- read_csv('data/govt_091817_061218.csv') 28 | matches <- cases %>% 29 | select(-Name) %>% 30 | # match on LastName & SecondLastName 31 | left_join(govt) %>% 32 | # Age differences <=5 33 | filter(as.numeric(`Edad del fallecido:`) <= (Age+5) & as.numeric(`Edad del fallecido:`) >= (Age-5)) %>% 34 | # Matching death municipality or residence municipality 35 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 36 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 37 | filter( 38 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 39 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 40 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 41 | ) %>% 42 | select(case_id, DN, DeathNumber) 43 | 44 | matches <- cases %>% 45 | filter(!(case_id %in% matches$case_id))%>% 46 | # Match on Name,LastName 47 | select(-SecondLastName) %>% 48 | left_join(govt) %>% 49 | # Age differences <=5 50 | filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+5) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-5)) %>% 51 | # Matching death municipality or residence municipality 52 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 53 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 54 | filter( 55 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 56 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 57 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 58 | ) %>% 59 | select(case_id, DN, DeathNumber) %>% 60 | rbind(matches) 61 | 62 | matches <- cases %>% 63 | filter(!(case_id %in% matches$case_id))%>% 64 | # Match on Name, SecondLastName 65 | select(-LastName) %>% 66 | left_join(govt) %>% 67 | # Age differences <=5 68 | filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+5) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-5)) %>% 69 | # Matching death municipality or residence municipality 70 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 71 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 72 | filter( 73 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 74 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 75 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 76 | ) %>% 77 | select(case_id, DN, DeathNumber) %>% 78 | rbind(matches) 79 | 80 | matches <- cases %>% 81 | filter(!(case_id %in% matches$case_id))%>% 82 | # Match on SecondLastName alone 83 | select(-LastName, -Name) %>% 84 | left_join(govt) %>% 85 | # Age differences <=5 86 | filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>% 87 | # Matching death municipality or residence municipality 88 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 89 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 90 | filter( 91 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 92 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 93 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 94 | ) %>% 95 | select(case_id, DN, DeathNumber) %>% 96 | rbind(matches) 97 | 98 | matches <- cases %>% 99 | filter(!(case_id %in% matches$case_id))%>% 100 | # Match on LastName alone 101 | select(-SecondLastName, -Name) %>% 102 | left_join(govt) %>% 103 | # Age differences <=5 104 | filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>% 105 | # Matching death municipality or residence municipality 106 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 107 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 108 | filter( 109 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 110 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 111 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 112 | ) %>% 113 | select(case_id, DN, DeathNumber) %>% 114 | rbind(matches) 115 | 116 | matches <- cases %>% 117 | filter(!(case_id %in% matches$case_id))%>% 118 | # Match on Name alone 119 | select(-SecondLastName, -LastName) %>% 120 | left_join(govt) %>% 121 | # Age differences <=5 122 | filter(as.numeric(`Edad del fallecido:`) <= as.numeric(Age+2) & as.numeric(`Edad del fallecido:`) >= as.numeric(Age-2)) %>% 123 | # Matching death municipality or residence municipality 124 | filter(DeathMunicipality == MunicipalityDeathPlace | ResidenceMunicipality == ResidencePlace) %>% 125 | # Matching death date: smaller than 10 day difference, OR matching DeathFacility: 126 | filter( 127 | (abs(as.numeric(as.Date(paste(DeathDate_Year, DeathDate_Month, DeathDate_Day, sep='-')) - as.Date(DeathDate)))<=10) 128 | | ((DeathPlace == 'HOSPITALIZADO' | DeathPlace == 'AMBULATORIO/SALA DE EMERGENCIA') & `¿Dónde murió la persona?`=='Hospital/Hospital') 129 | | (DeathPlace == 'RESIDENCIA DE LA PERSONA FALLECIDA' & `¿Dónde murió la persona?`=='Residencia privada') 130 | ) %>% 131 | select(case_id, DN, DeathNumber) %>% 132 | rbind(matches) 133 | 134 | 135 | matched_cases <- cases %>% 136 | filter(case_id %in% matches$case_id) %>% 137 | left_join(matches) %>% 138 | select(-Name,-LastName,-SecondLastName) %>% 139 | left_join(govt) 140 | 141 | 142 | ########################### 143 | ### TAKE OUT DUPLICATES ### 144 | ########################### 145 | 146 | # load the existing cases 147 | index <- read_csv('data/case_index.csv') 148 | 149 | # cases not existing in the current database 150 | matched_cases <- matched_cases %>% 151 | filter(!DN %in% index$DN) 152 | 153 | # cases do not replicate themselves 154 | # -- if there are duplicated responses, just pick one 155 | matched_cases <- matched_cases %>% 156 | arrange(DeathNumber) %>% 157 | mutate(last_case = lag(DeathNumber)) %>% 158 | filter(is.na(last_case) | (last_case != DeathNumber)) %>% 159 | select(-last_case) 160 | 161 | 162 | ########################## 163 | ### GENERATE BITACORAS ### 164 | ########################## 165 | 166 | matched_cases <- matched_cases %>% 167 | mutate(id = max(max(index$id),3000) + row_number()) 168 | 169 | source("fillingBitacoras.R") 170 | 171 | ## ~~ reporters to process the bitacoras 172 | 173 | 174 | -------------------------------------------------------------------------------- /fillingBitacoras.R: -------------------------------------------------------------------------------- 1 | ### CREATE EMPTY FORMS ### 2 | library(xlsx) 3 | library(openxlsx) 4 | 5 | tmp1 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 1) 6 | tmp2 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 2) 7 | tmp3 <- readxl::read_xlsx('bitacoras/template/bitácora.xlsx', sheet = 3) 8 | 9 | 10 | for (i in 1:nrow(matched_cases)) { 11 | # i=1 12 | print(i) 13 | 14 | form <- createWorkbook() 15 | caseInfo <- createSheet(form, sheetName="Información del caso") 16 | questionnaire <- createSheet(form, sheetName="Cuestionario") 17 | statusInfo <- createSheet(form, sheetName="Estatus del caso") 18 | 19 | ##################### 20 | ## FILL CASE SHEET ## 21 | ##################### 22 | 23 | caseRows <- c('Información del caso',tmp1$`Información del caso`) 24 | col2 <- rep('',length(caseRows)) 25 | caseData <- data.frame(col1 = caseRows, col2 = col2) 26 | addDataFrame(x=caseData, sheet=caseInfo, row.names = F, col.names = F) 27 | caseCells = getCells(getRows(caseInfo), colIndex=1:2) 28 | 29 | # ADD STYLES 30 | 31 | al <- Alignment(wrapText = T) 32 | headBorder <- Border(color='black',position='TOP', pen='BORDER_MEDIUM') 33 | headStyle <- CellStyle(form) +CellProtection(locked=T) +Font(form, isBold = T, heightInPoints = 14) + Alignment(wrapText = T) 34 | setCellStyle(caseCells$`1.1`, headStyle) 35 | # add border 36 | CB.setBorder(CellBlock(caseInfo, 1,1,1,2,FALSE), headBorder, colIndex=1:2, rowIndex=1) 37 | # spacing 38 | setColumnWidth(caseInfo, 1, 30) 39 | 40 | 41 | ######################## 42 | ## FILL QUESTIONNAIRE ## 43 | ######################## 44 | 45 | col1 <- c('Información del informante',tmp2$`Información del informante`) 46 | col1 <- ifelse(is.na(col1),'', col1) 47 | 48 | col2 <- c('',tmp2$X__1) 49 | col2 <- ifelse(is.na(col2),'', col2) 50 | 51 | col3 <- c('',tmp2$X__2) 52 | col3 <- ifelse(is.na(col3),'', col3) 53 | 54 | col4 <- c('',tmp2$X__3) 55 | col4 <- ifelse(is.na(col4),'', col4) 56 | 57 | col5 <- c('',tmp2$X__4) 58 | col5 <- ifelse(is.na(col5),'', col5) 59 | 60 | 61 | qData <- data.frame(col1 = col1, col2 = col2, col3 = col3, col4 = col4, col5 = col5) 62 | addDataFrame(x=qData, sheet=questionnaire, row.names = F, col.names = F) 63 | qCells = getCells(getRows(questionnaire), colIndex=1:5) 64 | 65 | # ADD STYLES 66 | 67 | # spacing 68 | setColumnWidth(questionnaire, 1, 60) 69 | setColumnWidth(questionnaire, 2, 40) 70 | setColumnWidth(questionnaire, 3, 40) 71 | setColumnWidth(questionnaire, 4, 40) 72 | setColumnWidth(questionnaire, 5, 40) 73 | 74 | # cell stlyes 75 | setCellStyle(qCells$`1.1`, headStyle) 76 | setCellStyle(qCells$`7.1`, headStyle) 77 | setCellStyle(qCells$`10.1`, headStyle) 78 | setCellStyle(qCells$`13.1`, headStyle) 79 | setCellStyle(qCells$`16.1`, headStyle) 80 | setCellStyle(qCells$`45.1`, headStyle) 81 | setCellStyle(qCells$`48.1`, headStyle) 82 | 83 | setCellStyle(qCells$`17.1`, CellStyle(form)+CellProtection(locked=T)+Font(form, isBold = T)) 84 | 85 | sourceStyle <- CellStyle(form)+CellProtection(locked=T)+Font(form, color = 'red', isBold=T) 86 | setCellStyle(qCells$`2.2`, sourceStyle) 87 | setCellStyle(qCells$`2.3`, sourceStyle) 88 | setCellStyle(qCells$`18.2`, sourceStyle) 89 | setCellStyle(qCells$`18.3`, sourceStyle) 90 | 91 | intStyle <- CellStyle(form)+CellProtection(locked=T)+Font(form, color = 'blue', isBold=T) 92 | setCellStyle(qCells$`18.4`, intStyle) 93 | setCellStyle(qCells$`18.5`, intStyle) 94 | 95 | qStyle <- CellStyle(form)+CellProtection(locked=T)+Alignment(wrapText = T)+Font(form, isItalic = T) 96 | setCellStyle(qCells$`8.1` ,qStyle) 97 | setCellStyle(qCells$`11.1` ,qStyle) 98 | setCellStyle(qCells$`14.1` ,qStyle) 99 | setCellStyle(qCells$`24.1` ,qStyle) 100 | setCellStyle(qCells$`26.1` ,qStyle) 101 | setCellStyle(qCells$`28.1` ,qStyle) 102 | setCellStyle(qCells$`29.1` ,qStyle) 103 | setCellStyle(qCells$`34.1` ,qStyle) 104 | setCellStyle(qCells$`35.1` ,qStyle) 105 | setCellStyle(qCells$`36.1` ,qStyle) 106 | setCellStyle(qCells$`37.1` ,qStyle) 107 | setCellStyle(qCells$`38.1` ,qStyle) 108 | setCellStyle(qCells$`39.1` ,qStyle) 109 | setCellStyle(qCells$`40.1` ,qStyle) 110 | setCellStyle(qCells$`41.1` ,qStyle) 111 | setCellStyle(qCells$`42.1` ,qStyle) 112 | setCellStyle(qCells$`43.1` ,qStyle) 113 | setCellStyle(qCells$`46.1` ,qStyle) 114 | 115 | # add note style 116 | noteFontHed = Font(form, color = 'gray30',isBold = T) 117 | noteFont = Font(form, color = 'gray40') 118 | CB.setBorder(CellBlock(questionnaire, 54,1,1,5,FALSE), headBorder, colIndex=1:5, rowIndex=1) 119 | CB.setFont(CellBlock(questionnaire, 54,1,11,1,FALSE), noteFont, colIndex=1, rowIndex=1:11) 120 | setCellStyle(qCells$`55.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 121 | setCellStyle(qCells$`56.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 122 | setCellStyle(qCells$`57.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 123 | setCellStyle(qCells$`59.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 124 | setCellStyle(qCells$`61.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 125 | setCellStyle(qCells$`62.1`, CellStyle(form)+noteFont+Alignment(wrapText = T)) 126 | 127 | # fill 128 | CB.setFill(CellBlock(questionnaire, 19,4,4,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:4) 129 | setCellStyle(qCells$`24.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue')) 130 | setCellStyle(qCells$`26.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue')) 131 | CB.setFill(CellBlock(questionnaire, 28,4,2,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:2) 132 | CB.setFill(CellBlock(questionnaire, 34,4,7,1,FALSE), Fill(backgroundColor = 'lightblue') , colIndex=1, rowIndex=1:7) 133 | setCellStyle(qCells$`49.4`,CellStyle(form)+Fill(backgroundColor = 'lightblue')) 134 | # CB.setFill(CellBlock(questionnaire, 29,3,4,1,FALSE), Fill(backgroundColor = '#FFC0CB'), colIndex=1, rowIndex=1:4) 135 | # setCellStyle(qCells$`39.3`,CellStyle(form)+Fill(backgroundColor = '#FFC0CB')) 136 | 137 | 138 | filledStyle <- CellStyle(form)+CellProtection(locked=T)+Alignment(wrapText = T) 139 | setCellStyle(qCells$`36.3`, filledStyle) 140 | setCellStyle(qCells$`39.2`, filledStyle) 141 | setCellStyle(qCells$`39.3`, filledStyle) 142 | setCellStyle(qCells$`40.2`, filledStyle) 143 | 144 | 145 | ############################ 146 | ## FILL INTERVIEWER SHEET ## 147 | ############################ 148 | 149 | interviewerRows <- c('Primera llamada',tmp3$`Primera llamada`) 150 | col2 <- rep('',length(interviewerRows)) 151 | interviewerData <- data.frame(col1 = interviewerRows, col2 = col2) 152 | addDataFrame(x=interviewerData, sheet=statusInfo, row.names = F, col.names = F) 153 | statusCells = getCells(getRows(statusInfo), colIndex=1:2) 154 | 155 | # ADD STYLES 156 | setCellStyle(statusCells$`1.1`, headStyle) 157 | setCellStyle(statusCells$`14.1`, headStyle) 158 | CB.setBorder(CellBlock(statusInfo, 14,1,1,2,FALSE), headBorder, colIndex=1:2, rowIndex=1) 159 | setColumnWidth(statusInfo, 1, 30) 160 | setColumnWidth(statusInfo, 2, 30) 161 | 162 | createFreezePane(questionnaire, rowSplit = 1, colSplit = 2) 163 | 164 | #### end of creating sheet 165 | ######################### 166 | #### ADD DATA ########### 167 | ######################### 168 | 169 | case <- matched_cases[i,] 170 | print(case$`Nombre del fallecido:`) 171 | # if (i >= 32 & i <42) {case$id = case$id +1 } 172 | # if (i >= 42 & i <60) {case$id = case$id +2 } 173 | # if (i >= 60) {case$id = case$id +3 } 174 | 175 | 176 | # Case Info 177 | setCellValue(caseCells$`2.2`,case$id,showNA = F) 178 | 179 | # Questionnare Info 180 | 181 | # Intro 182 | setCellValue(qCells$`3.2`, paste(case$`Su nombre:`, case$`Sus apellidos:`, sep=' '), showNA = F) 183 | setCellValue(qCells$`4.2`, case$`Su teléfono:`, showNA = F) 184 | setCellValue(qCells$`5.2`, case$`Su correo electrónico:`, showNA = F) 185 | 186 | # Section 4 187 | setCellValue(qCells$`19.2`, case$`Nombre del fallecido:`, showNA = F) 188 | setCellValue(qCells$`21.2`, case$`Apellidos:`, showNA = F) 189 | # --relatoinship 190 | setCellValue(qCells$`24.2`, case$`Su relación con el fallecido es:`, showNA = F) 191 | #-- age 192 | setCellValue(qCells$`26.2`, case$`Edad del fallecido:`, showNA = F) 193 | 194 | #-- birth date 195 | # setCellValue(qCells$`28.2`, case$`Fecha de nacimiento del fallecido:`, showNA = F) 196 | 197 | #--residency place 198 | setCellValue(qCells$`29.2`, case$`Lugar de residencia:`, showNA=F) 199 | setCellValue(qCells$`30.2`, case$`La dirección física exacta del fallecido es:`, showNA = F) 200 | # --5 date of death 201 | setCellValue(qCells$`34.2`, case$DeathDate, showNA=F) 202 | # -- 5.1 municipality of death 203 | setCellValue(qCells$`35.2`, case$`Lugar del fallecimiento:`, showNA=F) 204 | # --7 where did the person die 205 | setCellValue(qCells$`37.2`, case$`¿Dónde murió la persona?`, showNA = F) 206 | #---8 cause of death 207 | setCellValue(qCells$`39.2`, case$`El fallecido murió a causa de:`, showNA = F) 208 | #---9 how is it related to maria 209 | setCellValue(qCells$`40.2`, case$`¿Puedes describir lo que sucedió y cómo se relaciona la muerte con María o la crisis causada por ese huracán?`, showNA = F) 210 | 211 | #### govt database fills 212 | 213 | setCellValue(caseCells$`3.2`,case$CertificateNumber,showNA = F) 214 | setCellValue(caseCells$`4.2`,case$DN,showNA = F) 215 | setCellValue(caseCells$`5.2`,case$TypeOfDeath,showNA = F) 216 | 217 | setCellValue(qCells$`3.3`, case$InformantName, showNA = F) 218 | setCellValue(qCells$`19.3`, case$Name, showNA = F) 219 | setCellValue(qCells$`20.3`, case$MiddleName, showNA = F) 220 | setCellValue(qCells$`21.3`, case$LastName, showNA = F) 221 | setCellValue(qCells$`22.3`, case$SecondLastName, showNA = F) 222 | setCellValue(qCells$`24.3`, case$InformantRelationship, showNA = F) 223 | setCellValue(qCells$`26.3`, case$Age, showNA = F) 224 | setCellValue(qCells$`28.3`, paste(case$BirthDate_Year, case$BirthDate_Month, case$BirthDate_Day,sep="-"), showNA = F) 225 | setCellValue(qCells$`29.3`, case$ResidencePlace, showNA=F) 226 | setCellValue(qCells$`30.3`, case$ResidencePlaceAddress1, showNA=F) 227 | setCellValue(qCells$`31.3`, case$ResidencePlaceAddress2, showNA=F) 228 | setCellValue(qCells$`32.3`, case$ResidencePlaceAddress3, showNA=F) 229 | setCellValue(qCells$`34.3`, paste(case$DeathDate_Year, case$DeathDate_Month, case$DeathDate_Day, sep='-'), showNA=F) 230 | setCellValue(qCells$`35.3`, case$MunicipalityDeathPlace, showNA=F) 231 | # -- 6.occupation 232 | setCellValue(qCells$`36.3`, tolower(paste(case$Industry, case$Occupation,sep="|")), showNA=F) 233 | setCellValue(qCells$`37.3`, case$DeathPlace, showNA = F) 234 | # --7.1 facility name 235 | setCellValue(qCells$`38.3`, case$DeathFacility, showNA = F) 236 | setCellValue(qCells$`39.3`, case$nchsti, showNA = F) 237 | 238 | saveWorkbook(form, paste("bitacoras/Empty/bitacora_", case$id, '.xlsx',sep="")) 239 | 240 | } 241 | 242 | -------------------------------------------------------------------------------- /processBitacoras.R: -------------------------------------------------------------------------------- 1 | # library(xlsx) 2 | # library(readxl) 3 | # library(tidyverse) 4 | # library(stringr) 5 | 6 | library(purrr) 7 | 8 | ###################### 9 | ### LOAD BITACORAS ### 10 | ###################### 11 | 12 | 13 | # read_bitacora() by larry 14 | read_bitacora <- function(filename) { 15 | 16 | page1 <- read_excel(filename, sheet = "Información del caso", col_types = 'text') 17 | page2 <- read_excel(filename, sheet = "Cuestionario", col_types = 'text') 18 | page3 <- read_excel(filename, sheet = "Estatus del caso", col_types = 'text') 19 | 20 | page1_data <- data.frame(t(page1$`X__1`)) 21 | colnames(page1_data) <- 22 | c( 23 | "id", 24 | "Certificate Number", 25 | "DN", 26 | "TypeOfDeath" 27 | ) 28 | 29 | page2_data <- 30 | data.frame(t(page2[c(2:4, 17:21, 23, 25, 27:31, 33:39, 48),2:5])) 31 | colnames(page2_data) <- 32 | c( 33 | "InformantName", 34 | "InformantPhone", 35 | "InformantMail", 36 | "Source", 37 | "VictimName", 38 | "VictimMiddleName", 39 | "VictimLastName", 40 | "VictimSecondLastName", 41 | "1. InformantRelationship", 42 | "2. VictimAge", 43 | "3. VictimDOB", 44 | "4. VictimResidence", 45 | "4. VictimResidence1", 46 | "4. VictimResidence2", 47 | "4. VictimResidence3", 48 | "5. VictimDeathDate", 49 | "5.1. VictimDeathMunicipality", 50 | "6. VictimOccupation", 51 | "7. DeathFacility", 52 | "7.1. DeathFacilityAddress", 53 | "8. Cause of Death", 54 | "9. Circumstances and hurricane relevance", 55 | "Direct/indirect CDC criterion" 56 | ) 57 | 58 | # Convert page 3 into a data frame 59 | page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)])) 60 | colnames(page3_data) <- 61 | c( 62 | paste( 63 | "First call", 64 | c( 65 | "Interviewer", 66 | "Date and Time", 67 | "Interview conducted", 68 | "Message on the phone", 69 | "Text message", 70 | "Number does not work", 71 | "No number", 72 | "Rescheduled call", 73 | "Person did not want to cooperate" 74 | ), 75 | sep = '-' 76 | ), 77 | paste( 78 | "Second call", 79 | c( 80 | "Interviewer", 81 | "Date and Time", 82 | "Interview conducted", 83 | "Message on the phone", 84 | "Text message", 85 | "Number does not work", 86 | "No number", 87 | "Rescheduled call", 88 | "Person did not want to cooperate" 89 | ), 90 | sep = '-' 91 | ) 92 | ) 93 | 94 | # Put it all in one 95 | data <- cbind(page1_data, page2_data, page3_data) %>% 96 | mutate(filename = filename) 97 | 98 | return(data) 99 | } 100 | 101 | 102 | bitacoras <- 103 | file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>% 104 | map(read_bitacora) %>% 105 | reduce(bind_rows) %>% 106 | mutate(id = as.character(round(parse_number(id))), 107 | `Certificate Number` = as.character(round(parse_number(`Certificate Number`))), 108 | DN = as.character(round(parse_number(DN)))) 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | ###################### 117 | ### CLEAN UP CASES ### 118 | ###################### 119 | 120 | 121 | 122 | getUnaccented <- function(input) { 123 | return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input)) 124 | } 125 | 126 | processCols <- function(dt) { 127 | # remove informant and caller info 128 | dt <- dt[ , !grepl( "Informant" , names(dt))] 129 | dt <- dt[ , !grepl( "call" , names(dt))] 130 | dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))] 131 | dt <- dt[ , !grepl( "Certificate Number" , names(dt))] 132 | 133 | # take out question numbers from colnames 134 | colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt)) 135 | 136 | return(dt) 137 | } 138 | 139 | bitacoras_cleaned <- processCols(bitacoras) %>% 140 | select(-filename, -VictimOccupation) 141 | 142 | getDB <- function(var_name){ 143 | bitacoras_cleaned %>% 144 | select(Source, var_name, DN,id) %>% 145 | group_by(DN,id) %>% 146 | spread(Source, var_name) %>% 147 | rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`) 148 | } 149 | 150 | 151 | ############################################## 152 | ##### GET BASIC CASE INFO FROM BITACORAS ##### 153 | ############################################## 154 | 155 | 156 | #VictimName 157 | # If confirmed by interview, take DB name first 158 | # If no DB name available, take SVY name 159 | name1 <- getDB("VictimName") %>% 160 | mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 161 | select(VictimName) 162 | name2 <- getDB("VictimMiddleName") %>% 163 | mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 164 | select(VictimMiddleName) 165 | name3 <- getDB("VictimLastName") %>% 166 | mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 167 | select(VictimLastName) 168 | name4 <- getDB("VictimSecondLastName") %>% 169 | mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 170 | select(VictimSecondLastName) 171 | 172 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4) 173 | 174 | 175 | # age 176 | part_age <- getDB("VictimAge") %>% 177 | mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 178 | select(VictimAge) 179 | 180 | # residential address 181 | municipalities <- readxl::read_excel('data/municipalities.xlsx') 182 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|')) 183 | part_res <- getDB("VictimResidence") %>% 184 | mutate(VictimResidenceMunicipality = ifelse( 185 | toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto', 186 | DB, 187 | ifelse( 188 | # if there is a municipality in INT.VictimResidence 189 | str_detect(getUnaccented(tolower(INT)), mu), 190 | # then take this municipality 191 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 192 | # otherwise take the DB 193 | DB) 194 | )) %>% 195 | select(VictimResidenceMunicipality) %>% 196 | mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality)) 197 | 198 | 199 | 200 | # death date 201 | part_deathdate <- getDB("VictimDeathDate") %>% 202 | mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>% 203 | mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>% 204 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 205 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 206 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 207 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 208 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 209 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 210 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 211 | select(VictimDeathDate) 212 | 213 | 214 | 215 | # death province 216 | part_deathPl <- getDB('VictimDeathMunicipality') %>% 217 | mutate( 218 | VictimDeathMunicipality = ifelse( 219 | (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')), 220 | DB, 221 | ifelse( 222 | # if there is a municipality in INT 223 | str_detect(getUnaccented(tolower(INT)), mu), 224 | # then take this municipality 225 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 226 | # otherwise take the survey VictimResidence 227 | SVY 228 | )) 229 | ) %>% 230 | select(VictimDeathMunicipality) %>% 231 | mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ',''))) 232 | 233 | part_cause <- getDB('Direct/indirect CDC criterion') %>% 234 | mutate(cause = INT) %>% 235 | select(cause) 236 | 237 | output <- part_name %>% 238 | merge(part_age) %>% 239 | merge(part_res) %>% 240 | merge(part_deathdate) %>% 241 | merge(part_deathPl) %>% 242 | merge(part_cause) %>% 243 | filter(is.na(cause) | cause != 'no relacionada') %>% 244 | mutate(VictimAge =as.numeric(VictimAge)) %>% 245 | mutate(id = as.numeric(id)) %>% 246 | mutate(DN = as.numeric(DN)) %>% 247 | mutate(source = 'survey') %>% 248 | select(-cause) 249 | 250 | 251 | #################################### 252 | ##### ADD DEMOGRAPHIC ANALYSIS ##### 253 | #################################### 254 | 255 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>% 256 | filter(!is.na(nchsti)) %>% 257 | mutate(causes = nchsti, DN= DeathNumber) %>% 258 | select(DN, causes) 259 | 260 | output <- output %>% 261 | left_join(grouped_cause) 262 | 263 | ###################################### 264 | ##### FORMAT FOR THE INTERACTIVE ##### 265 | ###################################### 266 | 267 | library(lubridate) 268 | basic <- output %>% 269 | mutate( 270 | VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName), 271 | VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName), 272 | name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)), 273 | age = VictimAge, 274 | dmu = tolower(VictimDeathMunicipality), 275 | rmu = tolower(VictimResidenceMunicipality), 276 | date = VictimDeathDate, 277 | month = month(as.Date(VictimDeathDate)) 278 | ) %>% 279 | select(id, DN, name, age, dmu, rmu,date, month, source, causes) 280 | 281 | 282 | ############################################# 283 | ##### ADD IN CAUSES OF DEATH CATEGORIES ##### 284 | ############################################# 285 | 286 | causes <- c('Condición de salud directamente relacionada con el huracán', 287 | 'Daños ocasionados por el huracán', 288 | "Falta de electricidad","Falta de agua o comida", 289 | "Falta de acceso a atención médica", 290 | "Falta de acceso a las comunicaciones") 291 | 292 | basic <- 293 | basic %>% 294 | merge(getDB('Cause of Death') %>% 295 | select(SVY, DN,id) %>% 296 | mutate( 297 | c1 = ifelse(str_detect(SVY, causes[1]),1,0), 298 | c2 = ifelse(str_detect(SVY, causes[2]),1,0), 299 | c3 = ifelse(str_detect(SVY, causes[3]),1,0), 300 | c4 = ifelse(str_detect(SVY, causes[4]),1,0), 301 | c5 = ifelse(str_detect(SVY, causes[5]),1,0), 302 | c6 = ifelse(str_detect(SVY, causes[6]),1,0) 303 | ) %>% 304 | select(DN,id, c1,c2,c3,c4,c5,c6)) 305 | 306 | 307 | ############################## 308 | ##### GET INTERVIEW DATA ##### 309 | ############################## 310 | 311 | interview <- getDB('Circumstances and hurricane relevance') %>% 312 | mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>% 313 | select(text_field_es) 314 | 315 | basic %>% 316 | merge(interview) %>% 317 | mutate(causes_en = causes, 318 | causes_es = '', text_field_en = '', 319 | PlaceOfDeath=dmu, 320 | DateOfDeath = date) %>% 321 | select(id, name, DateOfDeath, causes_en, causes_es, text_field_en, text_field_es) %>% 322 | write.xlsx('data/forProcess/for_translation_copyedits.xlsx',row.names=F) 323 | 324 | basic %>% 325 | write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F) 326 | 327 | 328 | ## ~~~ copy edits and translation 329 | -------------------------------------------------------------------------------- /data/case_index.csv: -------------------------------------------------------------------------------- 1 | id,DN,DeathNumber 2 | 1268,20662_2017,20662 3 | 1267,3148_2018,3148 4 | 1147,22830_2017,22830 5 | 1146,24489_2017,24489 6 | 1143,23229_2017,23229 7 | 1250,12790_2018,12790 8 | 1251,20610_2017,20610 9 | 1252,21509_2017,21509 10 | 1253,21893_2017,21893 11 | 1254,22664_2017,22664 12 | 1270,1966_2018,1966 13 | 1273,9396_2018,9396 14 | 1274,9451_2018,9451 15 | 1275,10138_2018,10138 16 | 1276,12175_2018,12175 17 | 1293,23215_2017,23215 18 | 1214,30854_2017,30854 19 | 1247,2675_2018,2675 20 | 1245,1584_2018,1584 21 | 1246,2091_2018,2091 22 | 1163,130_2018,130 23 | 1219,21910_2017,21910 24 | 1223,22363_2017,22363 25 | 1160,21650_2017,21650 26 | 1164,20416_2017,20416 27 | 1172,21542_2017,21542 28 | 1176,21746_2017,21746 29 | 1182,22419_2017,22419 30 | 1228,22792_2017,22792 31 | 1229,22807_2017,22807 32 | 1233,23322_2017,23322 33 | 1285,24390_2017,24390 34 | 1286,25208_2017,25208 35 | 1287,26580_2017,26580 36 | 1288,29620_2017,29620 37 | 1289,30396_2017,30396 38 | 1290,30894_2017,30894 39 | 1281,22342_2017,22342 40 | 1280,22235_2017,22235 41 | 1248,7194_2018,7194 42 | 1166,20505_2017,20505 43 | 1277,20139_2017,20139 44 | 1283,23483_2017,23483 45 | 1284,24380_2017,24380 46 | 1291,20493_2017,20493 47 | 1249,11861_2018,11861 48 | 1238,24299_2017,24299 49 | 1240,25438_2017,25438 50 | 1241,26584_2017,26584 51 | 1170,21407_2017,21407 52 | 1193,23790_2017,23790 53 | 1196,24492_2017,24492 54 | 1202,25886_2017,25886 55 | 1205,26530_2017,26530 56 | 1208,27155_2017,27155 57 | 1157,20720_2017,20720 58 | 1158,20886_2017,20886 59 | 1165,20423_2017,20423 60 | 1169,21276_2017,21276 61 | 1236,24029_2017,24029 62 | 1203,25914_2017,25914 63 | 1216,20849_2017,20849 64 | 1194,24170_2017,24170 65 | 1187,23369_2017,23369 66 | 1237,24038_2017,24038 67 | 1260,26595_2017,26595 68 | 1263,29084_2017,29084 69 | 1262,28520_2017,28520 70 | 1162,28981_2017,28981 71 | 1186,23207_2017,23207 72 | 1201,25628_2017,25628 73 | 1215,30904_2017,30904 74 | 1259,26450_2017,26450 75 | 1189,23581_2017,23581 76 | 1173,21616_2017,21616 77 | 1199,24944_2017,24944 78 | 1181,22413_2017,22413 79 | 1212,29786_2017,29786 80 | 1213,30192_2017,30192 81 | 1264,29380_2017,29380 82 | 1265,30601_2018,30601 83 | 1033,22736_2017,22736 84 | 1006,2471_2018,2471 85 | 1025,21878_2017,21878 86 | 1035,22769_2017,22769 87 | 1064,26678_2017,26678 88 | 1074,28658_2017,28658 89 | 1053,24661_2017,24661 90 | 1080,30428_2017,30428 91 | 1020,20936_2017,20936 92 | 1021,20937_2017,20937 93 | 1048,24016_2017,24016 94 | 1059,26079_2017,26079 95 | 1060,26475_2017,26475 96 | 1070,27851_2017,27851 97 | 1082,29027_2017,29027 98 | 1084,24973_2017,24973 99 | 1088,24697_2017,24697 100 | 1112,22619_2017,22619 101 | 1002,126_2018,126 102 | 1078,29931_2017,29931 103 | 1113,22679_2017,22679 104 | 1071,27996_2017,27996 105 | 1098,24292_2017,24292 106 | 1050,24211_2017,24211 107 | 1039,23136_2017,23136 108 | 1044,23631_2017,23631 109 | 1072,28310_2017,28310 110 | 1114,25132_2017,25132 111 | 1043,23801_2017,23801 112 | 1068,27764_2017,27764 113 | 1099,30418_2017,30418 114 | 1104,25758_2017,25758 115 | 1103,24487_2017,24487 116 | 1009,6072_2018,6072 117 | 1018,20757_2017,20757 118 | 1028,22225_2017,22225 119 | 1038,23051_2017,23051 120 | 1047,23999_2017,23999 121 | 1056,25351_2017,25351 122 | 1076,29423_2017,29423 123 | 1037,23030_2017,23030 124 | 1107,30389_2017,30389 125 | 1008,3318_2018,3318 126 | 1054,24730_2017,24730 127 | 1066,26784_2017,26784 128 | 1075,28965_2017,28965 129 | 1092,23509_2017,23509 130 | 1102,24139_2017,24139 131 | 1117,23141_2017,23141 132 | 1122,10941_2018,10941 133 | 1046,23901_2017,23901 134 | 1055,24805_2017,24805 135 | 1017,20462_2017,20462 136 | 1093,24284_2017,24284 137 | 1108,30443_2017,30443 138 | 1014,20195_2017,20195 139 | 1052,24412_2017,24412 140 | 1003,828_2018,828 141 | 1005,1193_2018,1193 142 | 1022,20991_2017,20991 143 | 1057,25472_2017,25472 144 | 1061,26567_2017,26567 145 | 1077,29912_2017,29912 146 | 1085,25993_2017,25993 147 | 1137,22872_2017,22872 148 | 1138,23461_2017,23461 149 | 1139,25034_2017,25034 150 | 1141,28172_2017,28172 151 | 1136,28679_2017,28679 152 | 1007,2898_2018,2898 153 | 1026,22000_2017,22e3 154 | 1029,22242_2017,22242 155 | 1036,22794_2017,22794 156 | 1065,26749_2017,26749 157 | 1024,21569_2017,21569 158 | 1105,25769_2017,25769 159 | 1133,21557_2017,21557 160 | 1135,24298_2017,24298 161 | 1126,25005_2017,25005 162 | 1120,30825_2017,30825 163 | 1129,3255_2018,3255 164 | 1130,20567_2017,20567 165 | 1095,25073_2017,25073 166 | 1023,21439_2017,21439 167 | 1079,30409_2017,30409 168 | 1096,26095_2017,26095 169 | 1097,28176_2017,28176 170 | 1100,20980_2017,20980 171 | 1001,116_2018,116 172 | 1004,1143_2018,1143 173 | 1010,7013_2018,7013 174 | 1011,7749_2018,7749 175 | 1012,8818_2018,8818 176 | 1013,20170_2017,20170 177 | 1015,20349_2017,20349 178 | 1016,20458_2017,20458 179 | 1019,20789_2017,20789 180 | 1027,22216_2017,22216 181 | 1030,22395_2017,22395 182 | 1031,22550_2017,22550 183 | 1034,22745_2017,22745 184 | 1040,23346_2017,23346 185 | 1041,23455_2017,23455 186 | 1042,23611_2017,23611 187 | 1049,24078_2017,24078 188 | 1051,24290_2017,24290 189 | 1058,25847_2017,25847 190 | 1063,26639_2017,26639 191 | 1067,27345_2017,27345 192 | 1069,27821_2017,27821 193 | 1073,28486_2017,28486 194 | 1081,30487_2017,30487 195 | 1083,29197_2017,29197 196 | 1086,23320_2017,23320 197 | 1087,26609_2017,26609 198 | 1091,23462_2017,23462 199 | 1094,24525_2017,24525 200 | 1101,22746_2017,22746 201 | 1106,26168_2017,26168 202 | 1110,22193_2017,22193 203 | 1111,22241_2017,22241 204 | 1115,27308_2017,27308 205 | 1116,29802_2017,29802 206 | 1118,24725_2017,24725 207 | 1119,27557_2017,27557 208 | 1121,30971_2017,30971 209 | 1123,21458_2017,21458 210 | 1124,21573_2017,21573 211 | 1125,24723_2017,24723 212 | 1127,25257_2017,25257 213 | 1128,22938_2017,22938 214 | 1131,20740_2017,20740 215 | 1132,21009_2017,21009 216 | 1134,22652_2017,22652 217 | 1140,25489_2017,25489 218 | 1156,301_2018,301 219 | 1167,20646_2017,20646 220 | 1168,21017_2017,21017 221 | 1171,21515_2017,21515 222 | 1174,21643_2017,21643 223 | 1178,22044_2017,22044 224 | 1180,22295_2017,22295 225 | 1183,22630_2017,22630 226 | 1184,22865_2017,22865 227 | 1185,23160_2017,23160 228 | 1190,23601_2017,23601 229 | 1191,23650_2017,23650 230 | 1195,24418_2017,24418 231 | 1197,24650_2017,24650 232 | 1200,25305_2017,25305 233 | 1204,26058_2017,26058 234 | 1206,26614_2017,26614 235 | 1207,26750_2017,26750 236 | 1209,28183_2017,28183 237 | 1210,28265_2017,28265 238 | 1211,29570_2017,29570 239 | 1217,21295_2017,21295 240 | 1218,21658_2017,21658 241 | 1221,22237_2017,22237 242 | 1222,22328_2017,22328 243 | 1224,22389_2017,22389 244 | 1225,22392_2017,22392 245 | 1226,22622_2017,22622 246 | 1227,22655_2017,22655 247 | 1230,22913_2017,22913 248 | 1231,22978_2017,22978 249 | 1232,23312_2017,23312 250 | 1234,23596_2017,23596 251 | 1235,23710_2017,23710 252 | 1239,24452_2017,24452 253 | 1243,27571_2017,27571 254 | 1244,27730_2017,27730 255 | 1255,22874_2017,22874 256 | 1256,23015_2017,23015 257 | 1257,23288_2017,23288 258 | 1258,23395_2017,23395 259 | 1261,27056_2017,27056 260 | 1269,552_2018,552 261 | 1271,6651_2018,6651 262 | 1272,7197_2018,7197 263 | 1278,20598_2017,20598 264 | 1279,22145_2017,22145 265 | 1292,22197_2017,22197 266 | 2001,22609_2017,22609 267 | 2002,24165_2017,24165 268 | 2004,27282_2017,27282 269 | 2005,24834_2017,24834 270 | 2016,28479_2017,28479 271 | 2017,29420_2017,29420 272 | 2018,28592_2017,28592 273 | 2019,25574_2017,25574 274 | 2020,22682_2017,22682 275 | 2022,23564_2017,23564 276 | 2027,21268_2017,21268 277 | 2028,24971_2017,24971 278 | 2029,20399_2017,20399 279 | 2030,24689_2017,24689 280 | 2031,25074_2017,25074 281 | 2032,27596_2017,27596 282 | 2034,22516_2017,22516 283 | 2035,20650_2017,20650 284 | 2037,23865_2017,23865 285 | 2039,21618_2017,21618 286 | 2040,22206_2017,22206 287 | 2041,20426_2017,20426 288 | 2042,26244_2017,26244 289 | 2045,22618_2017,22618 290 | 2046,23056_2017,23056 291 | 2047,21502_2017,21502 292 | 2053,26997_2017,26997 293 | 2056,24690_2017,24690 294 | 2057,23389_2017,23389 295 | 2058,20341_2017,20341 296 | 2060,31018_2017,31018 297 | 2064,21541_2017,21541 298 | 2065,29581_2017,29581 299 | 2071,21545_2017,21545 300 | 2072,2497_2018,2497 301 | 2074,1112_2018,1112 302 | 2075,27763_2017,27763 303 | 2078,26445_2017,26445 304 | 2079,25599_2017,25599 305 | 2083,25192_2017,25192 306 | 2085,24749_2017,24749 307 | 2087,24862_2017,24862 308 | 2096,27164_2017,27164 309 | 2097,24041_2017,24041 310 | 2100,23147_2017,23147 311 | 2102,21631_2017,21631 312 | 2104,25905_2017,25905 313 | 2108,27279_2017,27279 314 | 2109,22264_2017,22264 315 | 2111,21938_2017,21938 316 | 2114,27897_2017,27897 317 | 2120,21043_2017,21043 318 | 2121,23407_2017,23407 319 | 2126,25178_2017,25178 320 | 2128,26534_2017,26534 321 | 2130,29058_2017,29058 322 | 2132,23161_2017,23161 323 | 2133,24528_2017,24528 324 | 2136,23192_2017,23192 325 | 2140,23579_2017,23579 326 | 2143,22341_2017,22341 327 | 2144,21374_2017,21374 328 | 2149,25933_2017,25933 329 | 2150,21412_2017,21412 330 | 2154,20216_2017,20216 331 | 2155,22737_2017,22737 332 | 2159,27947_2017,27947 333 | 2160,25950_2017,25950 334 | 2165,21748_2017,21748 335 | 2166,25062_2017,25062 336 | 2167,20782_2017,20782 337 | 2168,22843_2017,22843 338 | 2173,29961_2017,29961 339 | 2176,20919_2017,20919 340 | 2177,25127_2017,25127 341 | 2178,28864_2017,28864 342 | 2182,29603_2017,29603 343 | 2183,26507_2017,26507 344 | 2184,23713_2017,23713 345 | 2185,23357_2017,23357 346 | 2187,25337_2017,25337 347 | 2190,27143_2017,27143 348 | 2193,7711_2018,7711 349 | 2206,26257_2017,26257 350 | 2207,22703_2017,22703 351 | 2208,23553_2017,23553 352 | 2209,22891_2017,22891 353 | 2213,26837_2017,26837 354 | 2214,2595_2018,2595 355 | 2008,24736_2017,24736 356 | 2070,20944_2017,20944 357 | 2533,21834_2017,21834 358 | 2501,31009_2017,31009 359 | 2528,25961_2017,25961 360 | 2547,20541_2017,20541 361 | 2502,24957_2017,24957 362 | 2534,23526_2017,23526 363 | 2549,24179_2017,24179 364 | 2537,21659_2017,21659 365 | 2503,22018_2017,22018 366 | 2504,21559_2017,21559 367 | 2505,21445_2017,21445 368 | 2506,22155_2017,22155 369 | 2507,25118_2017,25118 370 | 2553,21830_2017,21830 371 | 2529,21274_2017,21274 372 | 2508,20368_2017,20368 373 | 2509,24849_2017,24849 374 | 2538,27309_2017,27309 375 | 2550,20582_2017,20582 376 | 2532,21897_2017,21897 377 | 2554,22991_2017,22991 378 | 2510,21811_2017,21811 379 | 2511,22021_2017,22021 380 | 2527,23575_2017,23575 381 | 2539,20984_2017,20984 382 | 2555,23039_2017,23039 383 | 2512,26559_2017,26559 384 | 2551,22171_2017,22171 385 | 2513,23656_2017,23656 386 | 2556,23028_2017,23028 387 | 2514,22840_2017,22840 388 | 2515,24080_2017,24080 389 | 2531,24761_2017,24761 390 | 2516,22547_2017,22547 391 | 2557,21877_2017,21877 392 | 2517,22050_2017,22050 393 | 2518,23959_2017,23959 394 | 2540,23067_2017,23067 395 | 2541,21395_2017,21395 396 | 2558,31001_2017,31001 397 | 2519,24846_2017,24846 398 | 2520,21867_2017,21867 399 | 2542,22631_2017,22631 400 | 2521,21593_2017,21593 401 | 2543,21091_2017,21091 402 | 2559,25488_2017,25488 403 | 2522,23307_2017,23307 404 | 2535,21437_2017,21437 405 | 2544,21962_2017,21962 406 | 2523,21844_2017,21844 407 | 2545,22478_2017,22478 408 | 2536,23381_2017,23381 409 | 2524,21887_2017,21887 410 | 2530,22552_2017,22552 411 | 2525,21837_2017,21837 412 | 2526,24033_2017,24033 413 | 2560,21457_2017,21457 414 | 2561,23978_2017,23978 415 | 2563,23165_2017,23165 416 | 2587,22436_2017,22436 417 | 2576,20612_2017,20612 418 | 2589,21013_2017,21013 419 | 2623,31025_2017,31025 420 | 2564,23563_2017,23563 421 | 2567,20263_2017,20263 422 | 2598,23332_2017,23332 423 | 2572,20945_2017,20945 424 | 2568,20296_2017,20296 425 | 2569,23016_2017,23016 426 | 2592,23292_2017,23292 427 | 2577,23818_2017,23818 428 | 2594,24148_2017,24148 429 | 2617,25412_2017,25412 430 | 2608,23911_2017,23911 431 | 2624,24747_2017,24747 432 | 2562,30926_2017,30926 433 | 2573,23384_2017,23384 434 | 2610,22614_2017,22614 435 | 2625,31027_2017,31027 436 | 2609,22078_2017,22078 437 | 2615,21169_2017,21169 438 | 2590,21864_2017,21864 439 | 2578,25081_2017,25081 440 | 2582,21140_2017,21140 441 | 2583,22756_2017,22756 442 | 2614,23604_2017,23604 443 | 2579,23003_2017,23003 444 | 2618,30980_2017,30980 445 | 2593,23641_2017,23641 446 | 2584,22625_2017,22625 447 | 2595,22826_2017,22826 448 | 2596,25160_2017,25160 449 | 2620,26648_2017,26648 450 | 2574,20934_2017,20934 451 | 2603,21006_2017,21006 452 | 2566,23738_2017,23738 453 | 2601,21011_2017,21011 454 | 2606,26378_2017,26378 455 | 2604,21029_2017,21029 456 | 2597,24098_2017,24098 457 | 2619,22138_2017,22138 458 | 2588,21534_2017,21534 459 | 2586,21021_2017,21021 460 | 2575,23273_2017,23273 461 | 2605,25228_2017,25228 462 | 2633,31026_2017,31026 463 | 2580,20996_2017,20996 464 | 2571,20283_2017,20283 465 | 1295,26272_2017,26272 466 | 1296,26957_2017,26957 467 | 1297,21147_2017,21147 468 | 1298,29286_2017,29286 469 | 1299,21125_2017,21125 470 | 1300,28306_2017,28306 471 | 1301,2508_2018,2508 472 | 1303,3798_2018,3798 473 | 1304,6336_2018,6336 474 | 1306,9063_2018,9063 475 | 1307,22467_2017,22467 476 | 1308,29861_2017,29861 477 | 1309,30363_2017,30363 478 | 1310,6660_2018,6660 479 | 1312,21198_2017,21198 480 | 1313,3180_2018,3180 481 | 1315,1580_2018,1580 482 | 3001,10162_2018,10162 483 | 3003,23592_2017,23592 484 | 3004,23795_2017,23795 485 | 3005,23810_2017,23810 486 | 3007,24545_2017,24545 487 | 3008,24552_2017,24552 488 | 3009,24594_2017,24594 489 | 3011,25094_2017,25094 490 | 3013,26185_2017,26185 491 | 3014,26332_2017,26332 492 | 3015,26380_2017,26380 493 | 3016,26645_2017,26645 494 | 3017,28044_2017,28044 495 | 3018,29272_2017,29272 496 | 3019,29439_2017,29439 497 | 3020,29673_2017,29673 498 | 3021,30664_2017,30664 499 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | "5.1. VictimDeathMunicipality", 2 | "6. VictimOccupation", 3 | "7. DeathFacility", 4 | "7.1. DeathFacilityAddress", 5 | "8. Cause of Death", 6 | "9. Circumstances and hurricane relevance", 7 | "Direct/indirect CDC criterion" 8 | ) 9 | # Convert page 3 into a data frame 10 | page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)])) 11 | colnames(page3_data) <- 12 | c( 13 | paste( 14 | "First call", 15 | c( 16 | "Interviewer", 17 | "Date and Time", 18 | "Interview conducted", 19 | "Message on the phone", 20 | "Text message", 21 | "Number does not work", 22 | "No number", 23 | "Rescheduled call", 24 | "Person did not want to cooperate" 25 | ), 26 | sep = '-' 27 | ), 28 | paste( 29 | "Second call", 30 | c( 31 | "Interviewer", 32 | "Date and Time", 33 | "Interview conducted", 34 | "Message on the phone", 35 | "Text message", 36 | "Number does not work", 37 | "No number", 38 | "Rescheduled call", 39 | "Person did not want to cooperate" 40 | ), 41 | sep = '-' 42 | ) 43 | ) 44 | # Put it all in one 45 | data <- cbind(page1_data, page2_data, page3_data) %>% 46 | mutate(filename = filename) 47 | return(data) 48 | } 49 | bitacoras <- 50 | file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>% 51 | map(read_bitacora) %>% 52 | reduce(bind_rows) %>% 53 | mutate(id = as.character(round(parse_number(id))), 54 | `Certificate Number` = as.character(round(parse_number(`Certificate Number`))), 55 | DN = as.character(round(parse_number(DN)))) 56 | ###################### 57 | ### CLEAN UP CASES ### 58 | ###################### 59 | getUnaccented <- function(input) { 60 | return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input)) 61 | } 62 | processCols <- function(dt) { 63 | # remove informant and caller info 64 | dt <- dt[ , !grepl( "Informant" , names(dt))] 65 | dt <- dt[ , !grepl( "call" , names(dt))] 66 | dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))] 67 | dt <- dt[ , !grepl( "Certificate Number" , names(dt))] 68 | # take out question numbers from colnames 69 | colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt)) 70 | return(dt) 71 | } 72 | bitacoras_cleaned <- processCols(bitacoras) %>% 73 | select(-filename, -VictimOccupation) 74 | getDB <- function(var_name){ 75 | bitacoras_cleaned %>% 76 | select(Source, var_name, DN,id) %>% 77 | group_by(DN,id) %>% 78 | spread(Source, var_name) %>% 79 | rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`) 80 | } 81 | ############################################## 82 | ##### GET BASIC CASE INFO FROM BITACORAS ##### 83 | ############################################## 84 | #VictimName 85 | # If confirmed by interview, take DB name first 86 | # If no DB name available, take SVY name 87 | name1 <- getDB("VictimName") %>% 88 | mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 89 | select(VictimName) 90 | name2 <- getDB("VictimMiddleName") %>% 91 | mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 92 | select(VictimMiddleName) 93 | name3 <- getDB("VictimLastName") %>% 94 | mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 95 | select(VictimLastName) 96 | name4 <- getDB("VictimSecondLastName") %>% 97 | mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 98 | select(VictimSecondLastName) 99 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4) 100 | # age 101 | part_age <- getDB("VictimAge") %>% 102 | mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 103 | select(VictimAge) 104 | # residential address 105 | municipalities <- readxl::read_excel('data/municipalities.xlsx') 106 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|')) 107 | part_res <- getDB("VictimResidence") %>% 108 | mutate(VictimResidenceMunicipality = ifelse( 109 | toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto', 110 | DB, 111 | ifelse( 112 | # if there is a municipality in INT.VictimResidence 113 | str_detect(getUnaccented(tolower(INT)), mu), 114 | # then take this municipality 115 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 116 | # otherwise take the DB 117 | DB) 118 | )) %>% 119 | select(VictimResidenceMunicipality) %>% 120 | mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality)) 121 | # death date 122 | part_deathdate <- getDB("VictimDeathDate") %>% 123 | mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>% 124 | mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>% 125 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 126 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 127 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 128 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 129 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 130 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 131 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 132 | select(VictimDeathDate) 133 | # death province 134 | part_deathPl <- getDB('VictimDeathMunicipality') %>% 135 | mutate( 136 | VictimDeathMunicipality = ifelse( 137 | (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')), 138 | DB, 139 | ifelse( 140 | # if there is a municipality in INT 141 | str_detect(getUnaccented(tolower(INT)), mu), 142 | # then take this municipality 143 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 144 | # otherwise take the survey VictimResidence 145 | SVY 146 | )) 147 | ) %>% 148 | select(VictimDeathMunicipality) %>% 149 | mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ',''))) 150 | part_cause <- getDB('Direct/indirect CDC criterion') %>% 151 | mutate(cause = INT) %>% 152 | select(cause) 153 | output <- part_name %>% 154 | merge(part_age) %>% 155 | merge(part_res) %>% 156 | merge(part_deathdate) %>% 157 | merge(part_deathPl) %>% 158 | merge(part_cause) %>% 159 | filter(is.na(cause) | cause != 'no relacionada') %>% 160 | mutate(VictimAge =as.numeric(VictimAge)) %>% 161 | mutate(id = as.numeric(id)) %>% 162 | mutate(DN = as.numeric(DN)) %>% 163 | mutate(source = 'survey') %>% 164 | select(-cause) 165 | #################################### 166 | ##### ADD DEMOGRAPHIC ANALYSIS ##### 167 | #################################### 168 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>% 169 | filter(!is.na(nchsti)) %>% 170 | mutate(causes = nchsti, DN= DeathNumber) %>% 171 | select(DN, causes) 172 | output <- output %>% 173 | left_join(grouped_cause) 174 | ###################################### 175 | ##### FORMAT FOR THE INTERACTIVE ##### 176 | ###################################### 177 | library(lubridate) 178 | basic <- output %>% 179 | mutate( 180 | VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName), 181 | VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName), 182 | name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)), 183 | age = VictimAge, 184 | dmu = tolower(VictimDeathMunicipality), 185 | rmu = tolower(VictimResidenceMunicipality), 186 | date = VictimDeathDate, 187 | month = month(as.Date(VictimDeathDate)) 188 | ) %>% 189 | select(id, DN, name, age, dmu, rmu,date, month, source, causes) 190 | ############################################# 191 | ##### ADD IN CAUSES OF DEATH CATEGORIES ##### 192 | ############################################# 193 | causes <- c('Condición de salud directamente relacionada con el huracán', 194 | 'Daños ocasionados por el huracán', 195 | "Falta de electricidad","Falta de agua o comida", 196 | "Falta de acceso a atención médica", 197 | "Falta de acceso a las comunicaciones") 198 | basic <- 199 | basic %>% 200 | merge(getDB('Cause of Death') %>% 201 | select(SVY, DN,id) %>% 202 | mutate( 203 | c1 = ifelse(str_detect(SVY, causes[1]),1,0), 204 | c2 = ifelse(str_detect(SVY, causes[2]),1,0), 205 | c3 = ifelse(str_detect(SVY, causes[3]),1,0), 206 | c4 = ifelse(str_detect(SVY, causes[4]),1,0), 207 | c5 = ifelse(str_detect(SVY, causes[5]),1,0), 208 | c6 = ifelse(str_detect(SVY, causes[6]),1,0) 209 | ) %>% 210 | select(DN,id, c1,c2,c3,c4,c5,c6)) 211 | ############################## 212 | ##### GET INTERVIEW DATA ##### 213 | ############################## 214 | interview <- getDB('Circumstances and hurricane relevance') %>% 215 | mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>% 216 | select(text_field_es) 217 | library(xlsx) 218 | library(readxl) 219 | library(tidyverse) 220 | library(stringr) 221 | # read_bitacora() by larry 222 | read_bitacora <- function(filename) { 223 | page1 <- read_excel(filename, sheet = "Información del caso", col_types = 'text') 224 | page2 <- read_excel(filename, sheet = "Cuestionario", col_types = 'text') 225 | page3 <- read_excel(filename, sheet = "Estatus del caso", col_types = 'text') 226 | page1_data <- data.frame(t(page1$`X__1`)) 227 | colnames(page1_data) <- 228 | c( 229 | "id", 230 | "Certificate Number", 231 | "DN", 232 | "TypeOfDeath" 233 | ) 234 | page2_data <- 235 | data.frame(t(page2[c(2:4, 17:21, 23, 25, 27:31, 33:39, 48),2:5])) 236 | colnames(page2_data) <- 237 | c( 238 | "InformantName", 239 | "InformantPhone", 240 | "InformantMail", 241 | "Source", 242 | "VictimName", 243 | "VictimMiddleName", 244 | "VictimLastName", 245 | "VictimSecondLastName", 246 | "1. InformantRelationship", 247 | "2. VictimAge", 248 | "3. VictimDOB", 249 | "4. VictimResidence", 250 | "4. VictimResidence1", 251 | "4. VictimResidence2", 252 | "4. VictimResidence3", 253 | "5. VictimDeathDate", 254 | "5.1. VictimDeathMunicipality", 255 | "6. VictimOccupation", 256 | "7. DeathFacility", 257 | "7.1. DeathFacilityAddress", 258 | "8. Cause of Death", 259 | "9. Circumstances and hurricane relevance", 260 | "Direct/indirect CDC criterion" 261 | ) 262 | # Convert page 3 into a data frame 263 | page3_data <- data.frame(t(page3$`X__1`[c(1, 2, 5:11, 14, 15, 18:24)])) 264 | colnames(page3_data) <- 265 | c( 266 | paste( 267 | "First call", 268 | c( 269 | "Interviewer", 270 | "Date and Time", 271 | "Interview conducted", 272 | "Message on the phone", 273 | "Text message", 274 | "Number does not work", 275 | "No number", 276 | "Rescheduled call", 277 | "Person did not want to cooperate" 278 | ), 279 | sep = '-' 280 | ), 281 | paste( 282 | "Second call", 283 | c( 284 | "Interviewer", 285 | "Date and Time", 286 | "Interview conducted", 287 | "Message on the phone", 288 | "Text message", 289 | "Number does not work", 290 | "No number", 291 | "Rescheduled call", 292 | "Person did not want to cooperate" 293 | ), 294 | sep = '-' 295 | ) 296 | ) 297 | # Put it all in one 298 | data <- cbind(page1_data, page2_data, page3_data) %>% 299 | mutate(filename = filename) 300 | return(data) 301 | } 302 | bitacoras <- 303 | file.path("bitacoras/Filled", list.files('bitacoras/Filled')) %>% 304 | map(read_bitacora) %>% 305 | reduce(bind_rows) %>% 306 | mutate(id = as.character(round(parse_number(id))), 307 | `Certificate Number` = as.character(round(parse_number(`Certificate Number`))), 308 | DN = as.character(round(parse_number(DN)))) 309 | ###################### 310 | ### CLEAN UP CASES ### 311 | ###################### 312 | getUnaccented <- function(input) { 313 | return(chartr('ÁáàéÉíÍóÓúÚüÜñÑ', 'aaaeeiioouuuunn', input)) 314 | } 315 | processCols <- function(dt) { 316 | # remove informant and caller info 317 | dt <- dt[ , !grepl( "Informant" , names(dt))] 318 | dt <- dt[ , !grepl( "call" , names(dt))] 319 | dt <- dt[ , !grepl( "TypeOfDeath" , names(dt))] 320 | dt <- dt[ , !grepl( "Certificate Number" , names(dt))] 321 | # take out question numbers from colnames 322 | colnames(dt) <- gsub('[1-9].?[1-9]?. ','',colnames(dt)) 323 | return(dt) 324 | } 325 | bitacoras_cleaned <- processCols(bitacoras) %>% 326 | select(-filename, -VictimOccupation) 327 | getDB <- function(var_name){ 328 | bitacoras_cleaned %>% 329 | select(Source, var_name, DN,id) %>% 330 | group_by(DN,id) %>% 331 | spread(Source, var_name) %>% 332 | rename(DB=`Base de datos causa de muerte`, CMT= Comentarios, SVY=`Encuesta/survey`, INT=`Respuesta del entrevistado`) 333 | } 334 | ############################################## 335 | ##### GET BASIC CASE INFO FROM BITACORAS ##### 336 | ############################################## 337 | #VictimName 338 | # If confirmed by interview, take DB name first 339 | # If no DB name available, take SVY name 340 | name1 <- getDB("VictimName") %>% 341 | mutate(VictimName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 342 | select(VictimName) 343 | name2 <- getDB("VictimMiddleName") %>% 344 | mutate(VictimMiddleName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 345 | select(VictimMiddleName) 346 | name3 <- getDB("VictimLastName") %>% 347 | mutate(VictimLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 348 | select(VictimLastName) 349 | name4 <- getDB("VictimSecondLastName") %>% 350 | mutate(VictimSecondLastName = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 351 | select(VictimSecondLastName) 352 | part_name <- name1 %>% merge(name2) %>% merge(name3) %>% merge(name4) 353 | # age 354 | part_age <- getDB("VictimAge") %>% 355 | mutate(VictimAge = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^c\\s') | INT == 'correcto', DB, INT)) %>% 356 | select(VictimAge) 357 | # residential address 358 | municipalities <- readxl::read_excel('data/municipalities.xlsx') 359 | mu <- getUnaccented(paste(tolower(municipalities$Municipality),collapse = '|')) 360 | part_res <- getDB("VictimResidence") %>% 361 | mutate(VictimResidenceMunicipality = ifelse( 362 | toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'^c\\s') | INT == 'correcto', 363 | DB, 364 | ifelse( 365 | # if there is a municipality in INT.VictimResidence 366 | str_detect(getUnaccented(tolower(INT)), mu), 367 | # then take this municipality 368 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 369 | # otherwise take the DB 370 | DB) 371 | )) %>% 372 | select(VictimResidenceMunicipality) %>% 373 | mutate(VictimResidenceMunicipality = tolower(VictimResidenceMunicipality)) 374 | # death date 375 | part_deathdate <- getDB("VictimDeathDate") %>% 376 | mutate(INT = ifelse(!is.na(as.numeric(INT)), as.character(as.Date(as.numeric(INT), origin="1899-12-30")), INT)) %>% 377 | mutate(VictimDeathDate = ifelse(toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'^C\\s') | str_detect(INT,'correct'), DB, INT)) %>% 378 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?septiembre (de 2017)?'), paste('2017-09-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 379 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?octubre (de 2017)?'), paste('2017-10-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 380 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?noviembre (de 2017)?'), paste('2017-11-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 381 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?diciembre (de 2017)?'), paste('2017-12-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 382 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?enero (de 2018)?'), paste('2018-01-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 383 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?febrero (de 2018)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 384 | mutate(VictimDeathDate = ifelse(str_detect(VictimDeathDate,'\\d+ (de )?abril (de 2017)?'), paste('2018-02-',str_squish(str_sub(VictimDeathDate,1,2)),sep=''), VictimDeathDate)) %>% 385 | select(VictimDeathDate) 386 | # death province 387 | part_deathPl <- getDB('VictimDeathMunicipality') %>% 388 | mutate( 389 | VictimDeathMunicipality = ifelse( 390 | (toupper(INT) == 'C' | is.na(INT) | str_detect(INT,'correcta|correcto|^C\\s|^c\\s')), 391 | DB, 392 | ifelse( 393 | # if there is a municipality in INT 394 | str_detect(getUnaccented(tolower(INT)), mu), 395 | # then take this municipality 396 | str_extract_all(getUnaccented(tolower(INT)), mu, simplify = T)[,1], 397 | # otherwise take the survey VictimResidence 398 | SVY 399 | )) 400 | ) %>% 401 | select(VictimDeathMunicipality) %>% 402 | mutate(VictimDeathMunicipality = tolower(str_replace(VictimDeathMunicipality,'PUERTO RICO, ',''))) 403 | part_cause <- getDB('Direct/indirect CDC criterion') %>% 404 | mutate(cause = INT) %>% 405 | select(cause) 406 | output <- part_name %>% 407 | merge(part_age) %>% 408 | merge(part_res) %>% 409 | merge(part_deathdate) %>% 410 | merge(part_deathPl) %>% 411 | merge(part_cause) %>% 412 | filter(is.na(cause) | cause != 'no relacionada') %>% 413 | mutate(VictimAge =as.numeric(VictimAge)) %>% 414 | mutate(id = as.numeric(id)) %>% 415 | mutate(DN = as.numeric(DN)) %>% 416 | mutate(source = 'survey') %>% 417 | select(-cause) 418 | #################################### 419 | ##### ADD DEMOGRAPHIC ANALYSIS ##### 420 | #################################### 421 | grouped_cause <- read_csv('data/govt_091817_061218.csv') %>% select(DeathNumber,nchsti) %>% 422 | filter(!is.na(nchsti)) %>% 423 | mutate(causes = nchsti, DN= DeathNumber) %>% 424 | select(DN, causes) 425 | output <- output %>% 426 | left_join(grouped_cause) 427 | ###################################### 428 | ##### FORMAT FOR THE INTERACTIVE ##### 429 | ###################################### 430 | library(lubridate) 431 | basic <- output %>% 432 | mutate( 433 | VictimMiddleName = ifelse(is.na(VictimMiddleName),'',VictimMiddleName), 434 | VictimSecondLastName = ifelse(is.na(VictimSecondLastName),'',VictimSecondLastName), 435 | name = str_squish(paste(VictimName, VictimMiddleName, VictimLastName, VictimSecondLastName)), 436 | age = VictimAge, 437 | dmu = tolower(VictimDeathMunicipality), 438 | rmu = tolower(VictimResidenceMunicipality), 439 | date = VictimDeathDate, 440 | month = month(as.Date(VictimDeathDate)) 441 | ) %>% 442 | select(id, DN, name, age, dmu, rmu,date, month, source, causes) 443 | ############################################# 444 | ##### ADD IN CAUSES OF DEATH CATEGORIES ##### 445 | ############################################# 446 | causes <- c('Condición de salud directamente relacionada con el huracán', 447 | 'Daños ocasionados por el huracán', 448 | "Falta de electricidad","Falta de agua o comida", 449 | "Falta de acceso a atención médica", 450 | "Falta de acceso a las comunicaciones") 451 | basic <- 452 | basic %>% 453 | merge(getDB('Cause of Death') %>% 454 | select(SVY, DN,id) %>% 455 | mutate( 456 | c1 = ifelse(str_detect(SVY, causes[1]),1,0), 457 | c2 = ifelse(str_detect(SVY, causes[2]),1,0), 458 | c3 = ifelse(str_detect(SVY, causes[3]),1,0), 459 | c4 = ifelse(str_detect(SVY, causes[4]),1,0), 460 | c5 = ifelse(str_detect(SVY, causes[5]),1,0), 461 | c6 = ifelse(str_detect(SVY, causes[6]),1,0) 462 | ) %>% 463 | select(DN,id, c1,c2,c3,c4,c5,c6)) 464 | ############################## 465 | ##### GET INTERVIEW DATA ##### 466 | ############################## 467 | interview <- getDB('Circumstances and hurricane relevance') %>% 468 | mutate(text_field_es = ifelse(nchar(INT) < 50 | is.na(INT), SVY, INT)) %>% 469 | select(text_field_es) 470 | basic %>% 471 | merge(interview) %>% 472 | mutate(causes_en = causes, 473 | causes_es = '', text_field_en = '', 474 | PlaceOfDeath=dmu, 475 | DateOfDeath = date) %>% 476 | select(id, name, DateOfDeath, causes_en, causes_es, text_field_en, text_field_es) 477 | basic 478 | View(interview) 479 | basic %>% 480 | mutate(PlaceOfDeath=dmu, 481 | DateOfDeath = date) 482 | basic %>% 483 | mutate(PlaceOfDeath=dmu, 484 | DateOfDeath = date) %>% 485 | select(-dmu,-date) %>% 486 | write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F) 487 | library(purrr) 488 | # library(xlsx) 489 | # library(readxl) 490 | # library(tidyverse) 491 | # library(stringr) 492 | library(write.xl) 493 | # library(xlsx) 494 | # library(readxl) 495 | # library(tidyverse) 496 | # library(stringr) 497 | library(writexl) 498 | ?write.xl 499 | ?write.xlsx 500 | ??write.xlsx 501 | basic %>% 502 | mutate(PlaceOfDeath=dmu, 503 | DateOfDeath = date) %>% 504 | select(-dmu,-date) %>% 505 | xlsx::write.xlsx('data/forProcess/rest_for_merge.xlsx',row.names=F) 506 | library(xlsx) 507 | install.packages('xlsx') 508 | library(xlsx) 509 | library(xlsx) 510 | library(xlsx) 511 | library(xlsx) 512 | library(xlsx) 513 | --------------------------------------------------------------------------------