├──  (Jesse Hammond's conflicted copy 2015-11-26).Rhistory
├── .DS_Store
├── .Rapp.history
├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── EventNetworks.Rproj
├── NAMESPACE
├── R
    ├── .DS_Store
    ├── .Rapp.history
    ├── EventNetworks.R
    ├── agents_doc.R
    ├── convert_cameo.R
    ├── convert_cameo_data.R
    ├── download_icews.R
    ├── download_phoenix.R
    ├── extract_dyadstats.R
    ├── extract_netstats.R
    ├── extract_nodestats.R
    ├── icews_cameo.R
    ├── ingest_histphoenix.R
    ├── ingest_icews.R
    ├── ingest_phoenix.R
    ├── phoenix_stats.R
    ├── phoenix_tables.R
    ├── states_doc.R
    ├── update_icews.R
    └── update_phoenix.R
├── README.md
├── data
    ├── agentnames.txt~
    ├── agents.RData
    ├── convert_cameo_data.RData
    └── states.RData
├── man
    ├── agents.Rd
    ├── convert_cameo.Rd
    ├── convert_cameo_data.Rd
    ├── download_icews.Rd
    ├── download_phoenix.Rd
    ├── eventNetworks.Rd
    ├── extract_dyadstats.Rd
    ├── extract_netstats.Rd
    ├── extract_nodestats.Rd
    ├── icews_cameo.Rd
    ├── ingest_histphoenix.Rd
    ├── ingest_icews.Rd
    ├── ingest_phoenix.Rd
    ├── phoenix_stats.Rd
    ├── phoenix_tables.Rd
    ├── states.Rd
    ├── update_icews.Rd
    └── update_phoenix.Rd
└── testing_script.R


/ (Jesse Hammond's conflicted copy 2015-11-26).Rhistory:
--------------------------------------------------------------------------------
  1 | # Download raw files from Phoenix data repo and ICEWS dataverse.
  2 | #
  3 | ######
  4 | ## Download new Phoenix data tables. This will download the entire
  5 | ##  archive the first time this function is run and fully populate
  6 | ##  the destination folder.
  7 | ## NOTE: This currently requires a clumsy step where it reinstalls phoxy
  8 | ##      every time the code is run. This should be cleaned up, but I'm not
  9 | ##      100% sure how to do so in a way that's both accurate and polite.
 10 | message('Checking Phoenix data...')
 11 | # library(phoxy)
 12 | phoxy::update_phoenix(destpath = phoenix_loc, phoenix_version = 'current')
 13 | ## Check to see if ICEWS folder exists and that it has at least one 'valid'
 14 | ##  ICEWS data table stored.
 15 | message('Checking ICEWS data...')
 16 | icews_checkfile <- 'events.2000.20150313082808.tab'
 17 | icews_files <- list.files(icews_loc)
 18 | if(!icews_checkfile %in% icews_files){
 19 | stop('Please enter a valid path that contains the ICEWS yearly files.')
 20 | } else {
 21 | message('ICEWS file location is valid.')
 22 | }
 23 | ######
 24 | #
 25 | # Read and parse ICEWS data for merging.
 26 | #
 27 | ######
 28 | ## Read and parse ICEWS data
 29 | message('Ingesting ICEWS data...')
 30 | icews_data <- phoxy::ingest_icews(icews_loc, start_date, end_date)
 31 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes
 32 | ##  for actors and states
 33 | message('Munging ICEWS data...')
 34 | icews_data <- icews_cameo(icews_data)
 35 | ## Subset ICEWS data to only keep key columns
 36 | icews_data <- icews_data[, list(date, sourceactorentity
 37 | , targetactorentity, rootcode
 38 | , eventcode, goldstein)]
 39 | icews_data[, source := 'icews']
 40 | ######
 41 | #
 42 | # Read and parse Phoenix data for merging.
 43 | #
 44 | ######
 45 | ## Read and parse Phoenix data
 46 | message('Ingesting Phoenix data...')
 47 | phoenix_data <- phoxy::ingest_phoenix(phoenix_loc = phoenix_loc
 48 | , start_date = start_date
 49 | , end_date = end_date)
 50 | ## Subset Phoenix data to only keep key columns
 51 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity
 52 | , sourceactorrole, sep = '')
 53 | , paste3(targetactorentity
 54 | , targetactorrole, sep = '')
 55 | , rootcode, eventcode, goldstein)]
 56 | setnames(phoenix_data, c('V2', 'V3')
 57 | , c('sourceactorentity', 'targetactorentity'))
 58 | phoenix_data[, source := 'phoenix']
 59 | ## Drop any missing data
 60 | phoenix_data <- phoenix_data[!is.na(rootcode)]
 61 | phoenix_data <- phoenix_data[!is.na(eventcode)]
 62 | phoenix_data <- phoenix_data[!is.na(goldstein)]
 63 | ######
 64 | #
 65 | # Combine ICEWS and Phoenix data
 66 | #
 67 | ######
 68 | try({
 69 | master_data <- rbind(icews_data, phoenix_data)
 70 | }, silent = T)
 71 | if(class(master_data)[1] == 'try-error'){
 72 | message('Specified range does not include Phoenix data.')
 73 | master_data <- icews_data
 74 | }
 75 | setnames(master_data, c('sourceactorentity', 'targetactorentity')
 76 | , c('actora', 'actorb'))
 77 | ## Subset events: if a subset of EVENTCODES are specified, keep only that
 78 | ##  set of events and aggregate up from there.
 79 | if(!any('all' %in% code_subset)){
 80 | master_data <- master_data[eventcode %in% code_subset]
 81 | }
 82 | ## Create new variable: Pentaclass (0-4)
 83 | master_data[rootcode %in% c(1, 2), pentaclass := 0L]
 84 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L]
 85 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L]
 86 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L]
 87 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L]
 88 | ######################################
 89 | ## IMPORTANT ASSUMPTION HERE:
 90 | ## I am *ASSUMING* that NULL/NA entries after a state code
 91 | ##  implies that the actor is the GOVERNMENT. As such I am replacing
 92 | ##  all such missing entries with 'GOV'.
 93 | ######################################
 94 | master_data[actora %in%  countrycode::countrycode_data$iso3c
 95 | , actora := paste0(actora, 'GOV')]
 96 | master_data[actorb %in%  countrycode::countrycode_data$iso3c
 97 | , actorb := paste0(actorb, 'GOV')]
 98 | ######
 99 | #
100 | # Pre-format data by de-duplicating, cleaning dates and actors,
101 | # and dropping unused columns
102 | #
103 | ######
104 | ## Aggregate dates to specified time window
105 | master_data[, date := lubridate::floor_date(date, time_window)]
106 | ## Subset events: keep only events within date range
107 | master_data <- master_data[date %in% dates]
108 | ## Subset events and columns: only events that:
109 | ##  1. involve specified actor set on both side (as ENTITIES)
110 | ##  2. involve TWO DIFFERENT actors (i.e. no self-interactions
111 | ##      as specified by user)
112 | if(('states' %in% actorset)){
113 | master_data <- master_data[(actora %in% paste0(actors, 'GOV')
114 | & actorb %in% paste0(actors, 'GOV'))
115 | | (actora %in% paste0(actors, 'MIL')
116 | & actorb %in% paste0(actors, 'MIL'))]
117 | master_data <- master_data[substr(actora, 1, 3) != substr(actorb, 1, 3)]
118 | ## Set actor codes to state-level factors
119 | master_data[, actora := factor(substr(actora, 1, 3), levels = levels(actors))]
120 | master_data[, actorb := factor(substr(actorb, 1, 3), levels = levels(actors))]
121 | } else{
122 | master_data <- master_data[(actora %in% actors
123 | & actorb %in% actors)]
124 | master_data <- master_data[actora != actorb]
125 | master_data[, actora := factor(actora, levels = levels(actors))]
126 | master_data[, actorb := factor(actorb, levels = levels(actors))]
127 | }
128 | ## Subset columns: drop unused event column
129 | if(level == 'rootcode'){
130 | master_data[, eventcode := NULL]
131 | master_data[, goldstein := NULL]
132 | master_data[, pentaclass := NULL]
133 | } else if(level == 'eventcode') {
134 | master_data[, rootcode := NULL]
135 | master_data[, goldstein := NULL]
136 | master_data[, pentaclass := NULL]
137 | } else if(level == 'goldstein') {
138 | master_data[, eventcode := NULL]
139 | master_data[, rootcode := NULL]
140 | master_data[, pentaclass := NULL]
141 | } else if(level == 'pentaclass') {
142 | master_data[, eventcode := NULL]
143 | master_data[, rootcode := NULL]
144 | master_data[, goldstein := NULL]
145 | setcolorder(master_data, c(1,2,3,5,4))
146 | }
147 | ## Set names to generic
148 | setnames(master_data, c('date', 'actora', 'actorb', 'code', 'source'))
149 | ## Set CAMEO coded event/root codes to factors
150 | master_data[, code := factor(code, levels = codes)]
151 | master_data
152 | plyr::vaggregate(master_data, .group = 'code', .fun = 'count')
153 | plyr::vaggregate(master_data, group = 'code', fun = 'count')
154 | plyr::vaggregate(master_data, .group = 'code', .fun = 'count')
155 | plyr::vaggregate(master_data, .group = 'code', 'count')
156 | master_data[, .N, by = code]
157 | master_data[, .N, by = list(date, actora, code)]
158 | master_data[, .N, by = list(date, actora, code, source)]
159 | master_data[, .N, by = list(date, actora, actorb, code, source)]
160 | ######
161 | #
162 | # Set up some initial values: Time windows
163 | #
164 | ######
165 | ## Date objects
166 | if (class(start_date) %in% c('numeric', 'integer')
167 | | class(end_date) %in% c('numeric', 'integer')){
168 | start_date <- as.Date(lubridate::ymd(start_date))
169 | end_date <- as.Date(lubridate::ymd(end_date))
170 | }
171 | dates <- seq.Date(start_date, end_date, by = 'day')
172 | dates <- unique(lubridate::round_date(dates, time_window))
173 | ######
174 | #
175 | # Set up some initial values: Actors
176 | #
177 | ######
178 | ## Paste-function that can handle NA entries
179 | ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste)
180 | paste3 <- function(...,sep=", ") {
181 | L <- list(...)
182 | L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
183 | ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"",
184 | gsub(paste0(sep,sep),sep,
185 | do.call(paste,c(L,list(sep=sep)))))
186 | is.na(ret) <- ret==""
187 | ret
188 | }
189 | ## Default actors: 255 ISO-coded countries
190 | if('states' %in% actorset){
191 | # Set up set of primary actor codes
192 | statelist <- unique(countrycode::countrycode_data$iso3c)
193 | statelist <- statelist[!is.na(statelist)]
194 | actors <- as.factor(sort(statelist))
195 | n <- length(actors)
196 | } else {
197 | ## Set up set of secondary actor codes
198 | secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD'
199 | , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL')
200 | statelist <- countrycode::countrycode_data$iso3c
201 | actors <- unique(statelist[statelist %in% actorset])
202 | actors <- actors[!is.na(actors)]
203 | actors <- unique(as.vector(outer(actors, secondary_actors, paste, sep = '')))
204 | actors <- as.factor(sort(actors))
205 | n <- length(actors)
206 | }
207 | ######
208 | #
209 | # Set up some initial values: Event codes
210 | #
211 | ######
212 | ## Factor variables describing CAMEO categories
213 | if(level == 'rootcode'){
214 | codes <- factor(1:20)
215 | levels(codes) <- as.character(1:20)
216 | } else if(level == 'eventcode'){
217 | codes <- factor(1:298)
218 | levels(codes) <- as.character(
219 | c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31
220 | , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57
221 | , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86
222 | , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034
223 | , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116
224 | , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246
225 | , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385
226 | , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444
227 | , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663
228 | , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183
229 | , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042)
230 | )
231 | } else if(level == 'pentaclass'){
232 | codes <- factor(0:4)
233 | levels(codes) <- as.character(0:4)
234 | }
235 | ## Subset of event codes
236 | if(!any('all' %in% codeset)){
237 | if(sum(!codeset %in% codes) > 0){
238 | message('Warning: some event codes do not match specified event class. Proceeding with valid event codes.')
239 | }
240 | codes <- codes[codes %in% codeset]
241 | if(length(codes) == 0){
242 | stop('Please enter a valid set of event codes or pentaclass values.')
243 | }
244 | }
245 | ######
246 | #
247 | # Set up some empty storage objects
248 | #
249 | ######
250 | # Storage for daily network objects
251 | master_networks <- vector('list', length(codes))
252 | names(master_networks) <- paste0('code', codes)
253 | # Storage for comparison of Phoenix and ICEWS reporting overlap
254 | filler <- rep(NA, length(dates))
255 | sources_overlap <- data.table(date = dates
256 | , phoenix_only = filler
257 | , icews_only = filler
258 | , both_sources = filler)
259 | ######
260 | #
261 | # Download raw files from Phoenix data repo and ICEWS dataverse.
262 | #
263 | ######
264 | ## Download new Phoenix data tables. This will download the entire
265 | ##  archive the first time this function is run and fully populate
266 | ##  the destination folder.
267 | ## NOTE: This currently requires a clumsy step where it reinstalls phoxy
268 | ##      every time the code is run. This should be cleaned up, but I'm not
269 | ##      100% sure how to do so in a way that's both accurate and polite.
270 | message('Checking Phoenix data...')
271 | # library(phoxy)
272 | phoxy::update_phoenix(destpath = phoenix_loc, phoenix_version = 'current')
273 | ## Check to see if ICEWS folder exists and that it has at least one 'valid'
274 | ##  ICEWS data table stored.
275 | message('Checking ICEWS data...')
276 | icews_checkfile <- 'events.2000.20150313082808.tab'
277 | icews_files <- list.files(icews_loc)
278 | if(!icews_checkfile %in% icews_files){
279 | stop('Please enter a valid path that contains the ICEWS yearly files.')
280 | } else {
281 | message('ICEWS file location is valid.')
282 | }
283 | ######
284 | #
285 | # Read and parse ICEWS data for merging.
286 | #
287 | ######
288 | ## Read and parse ICEWS data
289 | message('Ingesting ICEWS data...')
290 | icews_data <- phoxy::ingest_icews(icews_loc, start_date, end_date)
291 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes
292 | ##  for actors and states
293 | message('Munging ICEWS data...')
294 | icews_data <- icews_cameo(icews_data)
295 | ## Subset ICEWS data to only keep key columns
296 | icews_data <- icews_data[, list(date, sourceactorentity
297 | , targetactorentity, rootcode
298 | , eventcode, goldstein)]
299 | icews_data[, source := 'icews']
300 | ######
301 | #
302 | # Read and parse Phoenix data for merging.
303 | #
304 | ######
305 | ## Read and parse Phoenix data
306 | message('Ingesting Phoenix data...')
307 | phoenix_data <- phoxy::ingest_phoenix(phoenix_loc = phoenix_loc
308 | , start_date = start_date
309 | , end_date = end_date)
310 | ## Subset Phoenix data to only keep key columns
311 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity
312 | , sourceactorrole, sep = '')
313 | , paste3(targetactorentity
314 | , targetactorrole, sep = '')
315 | , rootcode, eventcode, goldstein)]
316 | setnames(phoenix_data, c('V2', 'V3')
317 | , c('sourceactorentity', 'targetactorentity'))
318 | phoenix_data[, source := 'phoenix']
319 | ## Drop any missing data
320 | phoenix_data <- phoenix_data[!is.na(rootcode)]
321 | phoenix_data <- phoenix_data[!is.na(eventcode)]
322 | phoenix_data <- phoenix_data[!is.na(goldstein)]
323 | ######
324 | #
325 | # Combine ICEWS and Phoenix data
326 | #
327 | ######
328 | try({
329 | master_data <- rbind(icews_data, phoenix_data)
330 | }, silent = T)
331 | if(class(master_data)[1] == 'try-error'){
332 | message('Specified range does not include Phoenix data.')
333 | master_data <- icews_data
334 | }
335 | setnames(master_data, c('sourceactorentity', 'targetactorentity')
336 | , c('actora', 'actorb'))
337 | ## Subset events: if a subset of EVENTCODES are specified, keep only that
338 | ##  set of events and aggregate up from there.
339 | if(!any('all' %in% code_subset)){
340 | master_data <- master_data[eventcode %in% code_subset]
341 | }
342 | ## Create new variable: Pentaclass (0-4)
343 | master_data[rootcode %in% c(1, 2), pentaclass := 0L]
344 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L]
345 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L]
346 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L]
347 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L]
348 | ######################################
349 | ## IMPORTANT ASSUMPTION HERE:
350 | ## I am *ASSUMING* that NULL/NA entries after a state code
351 | ##  implies that the actor is the GOVERNMENT. As such I am replacing
352 | ##  all such missing entries with 'GOV'.
353 | ######################################
354 | master_data[actora %in%  countrycode::countrycode_data$iso3c
355 | , actora := paste0(actora, 'GOV')]
356 | master_data[actorb %in%  countrycode::countrycode_data$iso3c
357 | , actorb := paste0(actorb, 'GOV')]
358 | ######
359 | #
360 | # Pre-format data by de-duplicating, cleaning dates and actors,
361 | # and dropping unused columns
362 | #
363 | ######
364 | ## Subset events and columns: only events that:
365 | ##  1. involve specified actor set on both side (as ENTITIES)
366 | ##  2. involve TWO DIFFERENT actors (i.e. no self-interactions
367 | ##      as specified by user)
368 | if(('states' %in% actorset)){
369 | master_data <- master_data[(actora %in% paste0(actors, 'GOV')
370 | & actorb %in% paste0(actors, 'GOV'))
371 | | (actora %in% paste0(actors, 'MIL')
372 | & actorb %in% paste0(actors, 'MIL'))]
373 | master_data <- master_data[substr(actora, 1, 3) != substr(actorb, 1, 3)]
374 | ## Set actor codes to state-level factors
375 | master_data[, actora := factor(substr(actora, 1, 3), levels = levels(actors))]
376 | master_data[, actorb := factor(substr(actorb, 1, 3), levels = levels(actors))]
377 | } else{
378 | master_data <- master_data[(actora %in% actors
379 | & actorb %in% actors)]
380 | master_data <- master_data[actora != actorb]
381 | master_data[, actora := factor(actora, levels = levels(actors))]
382 | master_data[, actorb := factor(actorb, levels = levels(actors))]
383 | }
384 | ## Subset columns: drop unused event column
385 | if(level == 'rootcode'){
386 | master_data[, eventcode := NULL]
387 | master_data[, goldstein := NULL]
388 | master_data[, pentaclass := NULL]
389 | } else if(level == 'eventcode') {
390 | master_data[, rootcode := NULL]
391 | master_data[, goldstein := NULL]
392 | master_data[, pentaclass := NULL]
393 | } else if(level == 'goldstein') {
394 | master_data[, eventcode := NULL]
395 | master_data[, rootcode := NULL]
396 | master_data[, pentaclass := NULL]
397 | } else if(level == 'pentaclass') {
398 | master_data[, eventcode := NULL]
399 | master_data[, rootcode := NULL]
400 | master_data[, goldstein := NULL]
401 | setcolorder(master_data, c(1,2,3,5,4))
402 | }
403 | ## Set names to generic
404 | setnames(master_data, c('date', 'actora', 'actorb', 'code', 'source'))
405 | ## Set CAMEO coded event/root codes to factors
406 | master_data[, code := factor(code, levels = codes)]
407 | ## Set keys
408 | setkeyv(master_data, c('date', 'actora', 'actorb', 'code', 'source'))
409 | ######
410 | #
411 | # Export : how much overlap between Phoenix and ICEWS reporting?
412 | #
413 | ######
414 | ## Create some temporary flag variables
415 | master_data[, dup_fromtop := duplicated(
416 | master_data[, list(date, actora, actorb, code)])]
417 | master_data[, dup_frombot := duplicated(
418 | master_data[, list(date, actora, actorb, code)], fromLast = T)]
419 | ## Export data on reporting overlap
420 | # Phoenix reporting only
421 | dates_tab <- data.table(date = dates)
422 | phoenix_only <- master_data[, sum(dup_fromtop == F
423 | & source == 'phoenix'), by = date]
424 | phoenix_only <- merge(dates_tab, phoenix_only, by = 'date', all.x = T)
425 | phoenix_only[is.na(V1), V1 := 0]
426 | sources_overlap$phoenix_only <- phoenix_only$V1
427 | # ICEWS reporting only
428 | icews_only <- master_data[, sum(dup_frombot == F
429 | & source == 'icews'), by = date]
430 | icews_only <- merge(dates_tab, icews_only, by = 'date', all.x = T)
431 | icews_only[is.na(V1), V1 := 0]
432 | sources_overlap$icews_only <- icews_only$V1
433 | # Both sources report
434 | both_sources <- master_data[, sum(dup_fromtop == T), by = date]
435 | both_sources <- merge(dates_tab, both_sources, by = 'date', all.x = T)
436 | both_sources[is.na(V1), V1 := 0]
437 | sources_overlap$both_sources <- both_sources$V1
438 | ## Drop flags and source variable
439 | master_data[, dup_fromtop := NULL]
440 | master_data[, dup_frombot := NULL]
441 | master_data[, source := NULL]
442 | master_data
443 | sources_overlap
444 | master_data <- unique(master_data)
445 | master_data[, date := lubridate::floor_date(date, time_window)]
446 | master_data <- master_data[date %in% dates]
447 | master_data
448 | master_data[, .N, by = list(date, actora, actorb, code)]
449 | tie_type = 'count'
450 | ## Subset events
451 | if(tie_type == 'binary'){
452 | ## Subset events: drop duplicated events/days/actors
453 | master_data <- master_data[!duplicated(master_data)]
454 | } else if(tie_type == 'count'){
455 | ## Subset events: drop duplicated events/days/actors
456 | master_data <- master_data[, .N, by = list(date, actora, actorb, code)]
457 | }
458 | master_data
459 | ## Format for networkDynamic creation
460 | master_data[, date := as.integer(format(date, '%Y%m%d'))]
461 | master_data[, end_date := date]
462 | setcolorder(master_data, c('date', 'end_date', 'actora', 'actorb', 'code'))
463 | ######
464 | #
465 | # For each time period in the specified range, subset the master data set,
466 | #  convert interactions to network ties, and turn the resulting edgelist
467 | #  into a network object. Save networks to a master list object.
468 | #
469 | ######
470 | dates <- c(dates, (max(dates) + 1))
471 | dates <- as.integer(format(dates, '%Y%m%d'))
472 | master_data[, date := as.integer(format(date, '%Y%m%d'))]
473 | master_data[, end_date := date]
474 | master_data
475 | this_code <- 1
476 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb)]
477 | this_code <- '4'
478 | if(tie_type == 'binary'){
479 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb)]
480 | } else if(tie_type == 'count'){
481 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb, N)]
482 | }
483 | event_data[, actora := as.integer(actora)]
484 | event_data[, actorb := as.integer(actorb)]
485 | event_net <- network::network.initialize(n = n, directed = T, loops = F)
486 | network.vertex.names(event_net) <- levels(actors)
487 | event_net
488 | event_data
489 | ?networkDynamic
490 | temporal_codenet <- networkDynamic(base.net = event_net
491 | , edge.spells = event_data
492 | , net.obs.period = net_period
493 | , edge.TEA.names = 'N'
494 | , verbose = F)
495 | temporal_codenet
496 | get.edge.attribute(temporal_codenet, 'N')
497 | get.edge.attribute(temporal_codenet, 'active')
498 | ?activate.edge.attribute
499 | temporal_codenet <- networkDynamic(base.net = event_net
500 | , edge.spells = event_data
501 | , net.obs.period = net_period
502 | , create.TEAs = T
503 | , edge.TEA.names = 'N'
504 | , verbose = F)
505 | temporal_codenet
506 | get.edge.attribute(temporal_codenet, 'N.active')
507 | get.edge.attribute(temporal_codenet, 'active')
508 | foo <- network.collapse(temporal_codenet, at = 20150101)
509 | foo
510 | get.edge.attribute(foo, 'N')
511 | plot(foo)
512 | library(phoenixNet)
513 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/.DS_Store


--------------------------------------------------------------------------------
/.Rapp.history:
--------------------------------------------------------------------------------
 1 | setwd("/Users/bkinne/Dropbox/Minerva/phoenixNet")
 2 | library(devtools)
 3 | remove.packages("phoenixNet")
 4 | getwd()
 5 | build()
 6 | install()
 7 | library(phoenixNet)
 8 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week")
 9 | ?phoenix_net
10 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
11 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
12 | ?phoenix_net
13 | net.dat <- phoenix_net("20010101", "20021231", level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
14 | remove.packages("phoxy")
15 | devtools::install_github("ahalterman/phoxy")
16 | net.dat <- phoenix_net("20010101", "20021231", level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
17 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
18 | traceback()
19 | net.dat <- phoenix_net(20100101, 20121231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
20 | net.dat <- phoenix_net(20150101, 20151031, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
21 | remove.packages("phoenixNet")
22 | build()
23 | install()
24 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
25 | remove.packages("phoenixNet")
26 | devtools::install_github("jrhammond/phoenixNet")
27 | library(phoenixNet)
28 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F)
29 | remove.packages("phoenixNet")
30 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | phoenix_processing.R
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: EventNetworks
 2 | Title: Gather, munge, and convert event data into daily event-networks.
 3 | Version: 0.0.0.9000
 4 | Author: person("Jesse", "Hammond", email = "jesse.hammond1@gmail.com", role =
 5 |     c("aut", "cre"))
 6 | Description: This package downloads daily event data (time range set
 7 |     by user) into a set of temp files. Event data are merged and de-duplicated,
 8 |     then turned into daily network objects by event-code or event-root-code. The
 9 |     resulting set of networks is saved as a (very) large list object, organized by
10 |     event-code and then by date.
11 | Depends:
12 |     R (>= 3.1.2),
13 |     data.table,
14 |     countrycode,
15 |     reshape2,
16 |     statnet,
17 |     plyr,
18 |     knitr
19 | VignetteBuilder: knitr
20 | Suggests:
21 |     knitr
22 | License: GPL-3
23 | LazyData: true
24 | RoxygenNote: 6.0.1
25 | 


--------------------------------------------------------------------------------
/EventNetworks.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[^\\.]")
2 | 


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/R/.DS_Store


--------------------------------------------------------------------------------
/R/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/R/.Rapp.history


--------------------------------------------------------------------------------
/R/EventNetworks.R:
--------------------------------------------------------------------------------
  1 | #' Convert Phoenix event data to daily event-networks.
  2 | #'
  3 | #'  Take event-level data and convert it into
  4 | #'  networks of interaction by time period. Output is in
  5 | #'  the form of a nested list object where each element is
  6 | #'  an R network object. These networks can then be processed
  7 | #'  and analyzed.
  8 | #'
  9 | #' @param start_date start date of time period as Ymd-format integer (ex:
 10 | #'          June 1, 2014 as 20140601).
 11 | #' @param end_date end date of time period as Ymd-format integer (ex:
 12 | #'          June 1, 2014 as 20140601).
 13 | #' @param dv_server Dataverse server address from which to download
 14 | #'          up-to-date ICEWS data. Defaults to Harvard Dataverse at
 15 | #'          harvard.dataverse.edu.
 16 | #' @param dv_key Unique user key to access SWORD API and automatically find and
 17 | #'           download up-to-date ICEWS data.
 18 | #' @param level level of event granularity ('eventcode', 'rootcode',
 19 | #'           'pentaclass', or 'goldstein'). 'Eventcode' creates a network for
 20 | #'           each of the 226 sub-codes in CAMEO. 'Rootcode' creates a network
 21 | #'           for each of the 20 event root codes in CAMEO. 'Pentaclass' creates
 22 | #'           a network for each of the 0-4 pentaclass codes in CAMEO.
 23 | #'           'Goldstein' creates one or two networks denoting mean Goldstein
 24 | #'           scores, either aggregated (positive - negative) or separated into
 25 | #'           two separate networks for positive and negative Goldstein scores.
 26 | #' @param phoenix_loc folder containing Phoenix data sets as daily .csv
 27 | #'          data tables. Automatically checks for new data sets each time
 28 | #'          the function is run, and downloads new daily data as it becomes
 29 | #'          available. Currently in 'one-and'done' format
 30 | #'          where it downloads the first time, and checks thereafter.
 31 | #' @param icews_loc folder containing ICEWS data sets as daily .tab data
 32 | #'          tables. Because I don't know how to work a SWORD API, these will
 33 | #'          need to be manually downloaded and updated.
 34 | #' @param histphoenix_loc folder containing historic Phoenix data from
 35 | #'          UIUC's Cline Center for Democracy. Leave empty if you don't
 36 | #'          want to use these data.
 37 | #' @param dv_server location of the ICEWS Dataverse server. Defaults to
 38 | #'          "harvard.dataverse.edu" and probably won't change anytime soon.
 39 | #' @param update should phoenixNet attempt to download new data? This will attempt
 40 | #'          to download any Phoenix data files that 'should' be present in the
 41 | #'          Phoenix data directory (one data file per day, from 2014-06-20 through
 42 | #'          the present day) and denote whether or not any of these files
 43 | #'          come up missing in the process.
 44 | #' @param actorset set of actors for which to create event-networks. Defaults
 45 | #'          to the 255 ISO-coded states in the international system. Specifying
 46 | #'          a specific state or set of states (as 3-character ISO codes) will
 47 | #'          extract all the 'major' domestic entites within that state/states.
 48 | #' @param codeset subset of event codes as specified by 'level'. This is useful
 49 | #'          if you desire to extract only a portion of interactions recorded
 50 | #'          by CAMEO, but has to align with the code aggregation specified
 51 | #'          in the 'level' argument. For example, if you specify 'rootcode',
 52 | #'          the 'codeset' you specify has to be one or more root codes between
 53 | #'          1 and 20. Entering a subset of root code values would return a
 54 | #'          smaller number of network layers. Defaults to 'all'.
 55 | #' @param code_subset subset of EVENTCODES that can be aggregated up to higher
 56 | #'          order interactions. For example, you might want to only look at
 57 | #'          event codes below 100, but then aggregate those event codes to
 58 | #'          rootcode or pentaclass.
 59 | #' @param time_window temporal window to build event-networks. Valid
 60 | #'          entries are 'day', 'week', 'month', 'quarter', or 'year'.
 61 | #' @param tie_type type of ties to return. Default is binarized ties where
 62 | #'          a tie represents the presence of one OR MORE interactions in the
 63 | #'          time period specified. Valid entries are 'binary', 'count'
 64 | #'          (count of events), 'meangoldstein' (mean Goldstein score),
 65 | #'          'sepgoldstein' (mean positive/negative Goldstein scores separated).
 66 | #'          NOTE: choosing a Goldstein score as tie type negates the "level"
 67 | #'          argument.
 68 | #' @param sources use only Phoenix or ICEWS data in creating event networks.
 69 | #'          Valid entries are 'phoenix', 'icews', 'histphoenix' or 'all' (default).
 70 | #'
 71 | #' @return master_networks a LIST object containing temporally referenced event-networks.
 72 | #'
 73 | #' @rdname eventNetworks
 74 | #'
 75 | #' @author Jesse Hammond
 76 | #'
 77 | #' @note This function is still in early development and may contain significant errors.
 78 | #'        Don't trust it.
 79 | #'
 80 | 
 81 | #' @export
 82 | #'
 83 | #' @import data.table
 84 | #' @import countrycode
 85 | #' @import reshape2
 86 | #' @import statnet
 87 | #' @import tsna
 88 | #' @import plyr
 89 | #' @import lubridate
 90 | #' @import dataverse
 91 | #' @import bit64
 92 | eventNetworks <- function(start_date
 93 |                         , end_date
 94 |                         , level
 95 |                         , dv_key
 96 |                         , phoenix_loc = NULL
 97 |                         , icews_loc = NULL
 98 |                         , histphoenix_loc = NULL
 99 |                         , dv_server = 'harvard.dataverse.edu'
100 |                         , update = TRUE
101 |                         , actorset = 'states'
102 |                         , codeset = 'all'
103 |                         , time_window = 'day'
104 |                         , code_subset = 'all'
105 |                         , tie_type = 'binary'
106 |                         , sources = 'all'
107 |                         ){
108 | 
109 |   library(lubridate)
110 |   ######
111 |   #
112 |   # Set up some initial values: Time windows
113 |   #
114 |   ######
115 | 
116 |   ## Date objects
117 |   if (class(start_date) %in% c('numeric', 'integer')
118 |       | class(end_date) %in% c('numeric', 'integer')){
119 |     start_date <- as.Date(lubridate::ymd(start_date))
120 |     end_date <- as.Date(lubridate::ymd(end_date))
121 |   }
122 |   dates <- seq.Date(start_date, end_date, by = 'day')
123 |   dates <- unique(lubridate::floor_date(dates, time_window))
124 | 
125 |   ######
126 |   #
127 |   # Set up some initial values: Actors
128 |   #
129 |   ######
130 | 
131 |   ## Paste-function that can handle NA entries
132 |   ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste)
133 |   paste3 <- function(...,sep=", ") {
134 |     L <- list(...)
135 |     L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
136 |     ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"",
137 |                gsub(paste0(sep,sep),sep,
138 |                     do.call(paste,c(L,list(sep=sep)))))
139 |     is.na(ret) <- ret==""
140 |     ret
141 |   }
142 | 
143 |   ## Default actors: 255 ISO-coded countries
144 |   if('states' %in% actorset){
145 |     # Set up set of primary actor codes
146 |     statelist <- unique(countrycode::countrycode_data$iso3c)
147 |     statelist <- statelist[!is.na(statelist)]
148 |     statelist <- c(statelist, 'KSV', 'IGO')
149 |     statelist <- sort(statelist)
150 |     actors <- as.factor(statelist)
151 |     n <- length(actors)
152 | 
153 |   } else {
154 |     ## Set up set of secondary actor codes
155 |     secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD'
156 |                           , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL')
157 |     statelist <- countrycode::countrycode_data$iso3c
158 |     statelist <- statelist[!is.na(statelist)]
159 |     actors <- unique(statelist[statelist %in% actorset])
160 |     actors <- actors[!is.na(actors)]
161 |     actors <- unique(as.vector(outer(actors, secondary_actors, paste, sep = '')))
162 |     actors <- as.factor(sort(actors))
163 |     n <- length(actors)
164 |   }
165 | 
166 |   ######
167 |   #
168 |   # Set up some initial values: Event codes
169 |   #
170 |   ######
171 | 
172 |   ## Factor variables describing CAMEO categories
173 |   if(level == 'rootcode'){
174 |     codes <- factor(1:20)
175 |     levels(codes) <- as.character(1:20)
176 |   } else if(level == 'eventcode'){
177 |     codes <- factor(1:298)
178 |     levels(codes) <- as.character(
179 |       c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31
180 |         , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57
181 |         , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86
182 |         , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034
183 |         , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116
184 |         , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246
185 |         , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385
186 |         , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444
187 |         , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663
188 |         , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183
189 |         , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042)
190 |       )
191 |   } else if(level == 'pentaclass'){
192 |     codes <- factor(0:4)
193 |     levels(codes) <- as.character(0:4)
194 |   } else if(level == 'goldstein'){
195 |     if(tie_type == 'netgoldstein'){
196 |       codes <- 1
197 |     } else if(tie_type == 'sepgoldstein'){
198 |       codes = c('mean_neg_goldstein', 'mean_pos_goldstein')
199 |     }
200 |   }
201 | 
202 |   ## Subset of event codes
203 |   if(!any('all' %in% codeset)){
204 |     if(sum(!codeset %in% codes) > 0){
205 |       message('Warning: some event codes do not match specified event class.
206 |               Proceeding with valid event codes.')
207 |     }
208 |     codes <- codes[codes %in% codeset]
209 |     if(length(codes) == 0){
210 |       stop('Please enter a valid set of event codes or pentaclass values.')
211 |     }
212 |   }
213 | 
214 | 
215 |   ######
216 |   #
217 |   # Download raw files from Phoenix data repo and ICEWS dataverse.
218 |   #
219 |   ######
220 | 
221 |   ## Download new Phoenix data tables. This will download the entire
222 |   ##  archive the first time this function is run and fully populate
223 |   ##  the destination folder.
224 | 
225 |   if(update == T){
226 |     message('Checking Phoenix data...')
227 |     EventNetworks::update_phoenix(destpath = phoenix_loc)
228 |     message('Checking ICEWS data...')
229 |     EventNetworks::update_icews(destpath = icews_loc)
230 |   }
231 | 
232 | 
233 |   ######
234 |   #
235 |   # Read and parse ICEWS data for merging.
236 |   #
237 |   ######
238 | 
239 |   icews_data <- data.table(date = as.Date(character())
240 |                            , actora = character()
241 |                            , actorb = character()
242 |                            , rootcode = numeric()
243 |                            , eventcode = integer()
244 |                            , goldstein = numeric())
245 | 
246 |   if (sources %in% c('ICEWS', 'all')){
247 | 
248 |     if(end_date < as.Date('1995-01-01')){
249 |       ## Only parse ICEWS data if it exists in that date range
250 |       icews_data <- data.table(date = as.Date(character())
251 |                                  , actora = character()
252 |                                  , actorb = character()
253 |                                  , rootcode = numeric()
254 |                                  , eventcode = integer()
255 |                                  , goldstein = numeric())
256 |       message('Specified timespan ends before ICEWS data coverage begins.')
257 | 
258 |     } else {
259 |       ## Check to see if ICEWS folder exists and that it has at least one 'valid'
260 |       ##  ICEWS data table stored.
261 |       years <- format(unique(lubridate::floor_date(dates, 'year')), '%Y')
262 |       message('Checking ICEWS data...')
263 |       icews_files <- list.files(icews_loc)
264 |       icews_years <- ldply(strsplit(icews_files, '\\.'))$V2
265 |       access_years <- which(icews_years %in% years)
266 | 
267 |       if(length(access_years) == 0){
268 |         message('No ICEWS files found in the specified timespan.')
269 |       } else {
270 | 
271 |         ## Read and parse ICEWS data
272 |         message('Ingesting ICEWS data...')
273 |         icews_data <- ingest_icews(icews_loc, start_date, end_date)
274 | 
275 |         ## Clean ICEWS data and format to Phoenix-style CAMEO codes
276 |         ##  for actors and states
277 |         message('Munging ICEWS data...')
278 |         icews_data <- icews_cameo(icews_data)
279 | 
280 |         ## Subset ICEWS data to only keep key columns
281 |         icews_data <- icews_data[, list(date, sourceactorentity
282 |                                         , targetactorentity, rootcode
283 |                                         , eventcode, goldstein)]
284 |         setnames(icews_data, c('sourceactorentity', 'targetactorentity')
285 |                  , c('actora', 'actorb'))
286 |       }
287 |     }
288 |   } else {
289 | 
290 |   }
291 | 
292 | 
293 |   ######
294 |   #
295 |   # Read and parse live Phoenix data for merging.
296 |   #
297 |   ######
298 | 
299 |   phoenix_data <- data.table(date = as.Date(character())
300 |                            , actora = character()
301 |                            , actorb = character()
302 |                            , rootcode = numeric()
303 |                            , eventcode = integer()
304 |                            , goldstein = numeric())
305 | 
306 |   if (sources %in% c('phoenix', 'all')){
307 | 
308 |     if(end_date < as.Date('2014-06-20')){
309 |       ## Only parse Phoenix data if it exists in that date range
310 |       message('Specified timespan ends before live Phoenix data coverage begins.')
311 | 
312 |     } else {
313 | 
314 |       ## Read and parse Phoenix data
315 |       message('Ingesting Phoenix data...')
316 |       phoenix_data <- ingest_phoenix(phoenix_loc = phoenix_loc
317 |                                      , start_date = start_date
318 |                                      , end_date = end_date)
319 | 
320 |       ## Subset Phoenix data to only keep key columns
321 |       phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity
322 |                                                        , sourceactorrole, sep = '')
323 |                                           , paste3(targetactorentity
324 |                                                    , targetactorrole, sep = '')
325 |                                           , rootcode, eventcode, goldstein)]
326 |       setnames(phoenix_data, c('V2', 'V3')
327 |                , c('actora', 'actorb'))
328 | 
329 |       ## Drop any missing data
330 |       phoenix_data <- phoenix_data[!is.na(rootcode)]
331 |       phoenix_data <- phoenix_data[!is.na(eventcode)]
332 |       phoenix_data <- phoenix_data[!is.na(goldstein)]
333 |     }
334 |   }
335 | 
336 |   ######
337 |   #
338 |   # Read and parse historic Phoenix data for merging.
339 |   #
340 |   ######
341 | 
342 |   histphoenix_data <- data.table(date = as.Date(character())
343 |                            , actora = character()
344 |                            , actorb = character()
345 |                            , rootcode = numeric()
346 |                            , eventcode = integer()
347 |                            , goldstein = numeric())
348 | 
349 |   if (sources %in% c('histphoenix', 'all')){
350 | 
351 |     if(end_date < as.Date('1945-01-01')){
352 |       ## Only parse Phoenix data if it exists in that date range
353 |       message('Specified timespan ends before historic Phoenix data coverage begins.')
354 | 
355 |     } else {
356 | 
357 |       ## Read and parse Phoenix data
358 |       message('Ingesting historic Phoenix data...')
359 | 
360 |       ## Read in and pre-parse historic Phoenix data
361 |       histphoenix_data <- ingest_histphoenix(histphoenix_loc, start_date, end_date, actors)
362 | 
363 |       ## Subset historic Phoenix data to only keep key columns
364 |       histphoenix_data <- histphoenix_data[, list(date, source, target
365 |                                           , root_code, code, goldstein)]
366 |       setnames(
367 |         histphoenix_data
368 |         , c('date', 'actora', 'actorb', 'rootcode', 'eventcode', 'goldstein')
369 |       )
370 |     }
371 |   }
372 | 
373 | 
374 |   ######
375 |   #
376 |   # Combine data sets
377 |   #
378 |   ######
379 | 
380 |   master_data <- rbind(icews_data, phoenix_data, histphoenix_data)
381 |   if(nrow(master_data) == 0){
382 |     stop('No Phoenix or ICEWS data available for the specified timespan.')
383 |   }
384 | 
385 |   ## Subset events: if a subset of EVENTCODES are specified, keep only that
386 |   ##  set of events and aggregate up from there.
387 |   if(!any('all' %in% code_subset)){
388 |     master_data <- master_data[eventcode %in% code_subset]
389 |   }
390 | 
391 |   ## Create new variable: Pentaclass (0-4)
392 |   master_data[rootcode %in% c(1, 2), pentaclass := 0L]
393 |   master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L]
394 |   master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L]
395 |   master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L]
396 |   master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L]
397 | 
398 | 
399 |   ######################################
400 |   ## IMPORTANT ASSUMPTION HERE:
401 |   ## I am *ASSUMING* that NULL/NA entries after a state code
402 |   ##  implies that the actor is the GOVERNMENT. As such I am replacing
403 |   ##  all such missing entries with 'GOV'.
404 |   ######################################
405 |   master_data[actora %in%  countrycode::countrycode_data$iso3c
406 |               , actora := paste0(actora, 'GOV')]
407 |   master_data[actorb %in%  countrycode::countrycode_data$iso3c
408 |               , actorb := paste0(actorb, 'GOV')]
409 | 
410 | 
411 |   ######
412 |   ## Subset events and columns: only events that:
413 |   ##  1. involve specified actor set on both side (as ENTITIES)
414 |   ##  2. involve TWO DIFFERENT actors (i.e. no self-interactions
415 |   ##      as specified by user)
416 |   ######
417 |   if(('states' %in% actorset)){
418 |     master_data <- master_data[
419 |       substr(actora, 1, 3) %in% actors & substr(actora, 4, 6) %in% c('GOV', 'MIL', '')
420 |       & substr(actorb, 1, 3) %in% actors & substr(actorb, 4, 6) %in% c('GOV', 'MIL', '')
421 |       & actora != actorb
422 |       ]
423 |     master_data[, actora := substr(actora, 1, 3)]
424 |     master_data[, actorb := substr(actorb, 1, 3)]
425 |     master_data[, actora := factor(actora, levels = levels(actors))]
426 |     master_data[, actorb := factor(actorb, levels = levels(actors))]
427 | 
428 |   } else{
429 |     master_data[, actora := substr(actora, 1, 6)]
430 |     master_data[, actorb := substr(actorb, 1, 6)]
431 |     master_data <- master_data[(actora %in% actors
432 |                               & actorb %in% actors
433 |                               & actora != actorb)]
434 |     master_data[, actora := factor(actora, levels = actors)]
435 |     master_data[, actorb := factor(actorb, levels = actors)]
436 |   }
437 | 
438 | 
439 |   ######
440 |   #
441 |   # Format data by de-duplicating, separating by date,
442 |   # and dropping unused columns
443 |   #
444 |   ######
445 | 
446 |   ## Drop duplicated variables
447 |   master_data <- unique(master_data)
448 | 
449 |   ## Drop self-events
450 |   master_data <- master_data[actora != actorb]
451 | 
452 |   ## Subset columns: drop unused event column
453 |   if(level == 'rootcode'){
454 |     master_data[, eventcode := NULL]
455 |     master_data[, goldstein := NULL]
456 |     master_data[, pentaclass := NULL]
457 |   } else if(level == 'eventcode') {
458 |     master_data[, rootcode := NULL]
459 |     master_data[, goldstein := NULL]
460 |     master_data[, pentaclass := NULL]
461 |   } else if(level == 'goldstein') {
462 |     master_data[, eventcode := NULL]
463 |     master_data[, rootcode := NULL]
464 |     master_data[, pentaclass := NULL]
465 |   } else if(level == 'pentaclass') {
466 |     master_data[, eventcode := NULL]
467 |     master_data[, rootcode := NULL]
468 |     master_data[, goldstein := NULL]
469 |   }
470 | 
471 | 
472 |   ## Set names to generic
473 |   setnames(master_data, c('date', 'actora', 'actorb', 'code'))
474 | 
475 |   ## Set CAMEO coded event/root/pentaclass codes to factors
476 |   # if(!level == 'goldstein'){
477 |   #   master_data[, code := factor(code, levels = codes)]
478 |   # }
479 | 
480 |   ## Set keys
481 |   setkeyv(master_data, c('date', 'actora', 'actorb', 'code'))
482 | 
483 | 
484 |   ## Aggregate dates to specified time window
485 |   master_data[, date := lubridate::floor_date(date, time_window)]
486 | 
487 |   ## Subset events: keep only events within date range
488 |   master_data <- master_data[date %in% dates]
489 | 
490 |   ## Subset events
491 |   if(tie_type == 'binary'){
492 |     ## Subset events: drop duplicated events/days/actors
493 |     master_data <- unique(master_data)
494 |   } else if(tie_type == 'count'){
495 |     ## Subset events: drop duplicated events/days/actors
496 |     master_data <- master_data[, .N, by = list(date, actora, actorb, code)]
497 |   } else if(tie_type == 'meangoldstein'){
498 |     master_data <- master_data[, mean_goldstein := mean(code), by = list(date, actora, actorb)]
499 |   } else if(tie_type == 'sepgoldstein'){
500 |     master_data[, pos_goldstein := NA_real_]
501 |     master_data[code > 0, pos_goldstein := code]
502 |     master_data[, neg_goldstein := NA_real_]
503 |     master_data[code < 0, neg_goldstein := code]
504 |     master_data[, mean_pos_goldstein := mean(pos_goldstein, na.rm = T), by = list(date, actora, actorb)]
505 |     master_data[is.na(mean_pos_goldstein), mean_pos_goldstein := 0]
506 |     master_data[, mean_neg_goldstein := mean(neg_goldstein, na.rm = T), by = list(date, actora, actorb)]
507 |     master_data[is.na(mean_neg_goldstein), mean_neg_goldstein := 0]
508 |     master_data <- master_data[mean_pos_goldstein != 0 | mean_neg_goldstein != 0, ]
509 |     master_data <- unique(master_data, by = c('date', 'actora', 'actorb'))
510 |     master_data[, code := as.integer(mean_pos_goldstein > 0) + 1]
511 |   }
512 | 
513 |   ## Format for networkDynamic creation
514 |   master_data[, date := as.integer(format(date, '%Y%m%d'))]
515 |   master_data[, end_date := date]
516 |   # setcolorder(master_data, c('date', 'end_date', 'actora', 'actorb', 'code'))
517 | 
518 |   ######
519 |   #
520 |   # For each time period in the specified range, subset the master data set,
521 |   #  convert interactions to network ties, and turn the resulting edgelist
522 |   #  into a network object. Save networks to a master list object.
523 |   #
524 |   ######
525 | 
526 | 
527 |   if(time_window == 'day'){
528 |     dates <- c(dates, dates[length(dates)])
529 |     dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::days(1)
530 |   } else if(time_window == 'week'){
531 |     dates <- c(dates, dates[length(dates)])
532 |     dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::weeks(1)
533 |   } else if(time_window == 'month'){
534 |     dates <- c(dates, dates[length(dates)])
535 |     dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) %m+% months(1)
536 |   } else if(time_window == 'quarter'){
537 |     dates <- c(dates, dates[length(dates)])
538 |     dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) %m+% months(1)
539 |   } else if(time_window == 'year'){
540 |     dates <- c(dates, dates[length(dates)])
541 |     dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::years(1)
542 |   }
543 | 
544 |   final_dates <- as.integer(format(dates, '%Y%m%d'))
545 | 
546 | 
547 |   ######
548 |   ## Break out tie construction and decide on output format based on tie type.
549 |   #####
550 | 
551 |   dated_arrays <- list()
552 | 
553 |   ###### Binary or count-weighted ties
554 |   if(tie_type %in% c('binary', 'count')){
555 | 
556 |     if(tie_type == 'binary'){
557 |       event_data <- master_data[, list(date, end_date, actora, actorb, code)]
558 |       event_data[, N := 1]
559 |     } else {
560 |       event_data <- master_data[, list(date, end_date, actora, actorb, code, N)]
561 |     }
562 | 
563 |     n_codes <- length(codes)
564 | 
565 |     for(i in 1:length(final_dates)){
566 | 
567 |       date_array <- array(
568 |         0
569 |         , dim = c(n, n, n_codes)
570 |         , dimnames = list(actors, actors, unique(codes))
571 |       )
572 |       this_date <- final_dates[i]
573 | 
574 |       for(j in 1:n_codes){
575 |         this_code <- codes[j]
576 |         this_events <- event_data[date %in% this_date & code %in% this_code]
577 | 
578 |         if(nrow(this_events) > 0){
579 |           this_events <- this_events[order(actora, actorb)]
580 |           this_dyad_idx <- as.matrix(
581 |             this_events[, list(as.integer(actora), as.integer(actorb))]
582 |           )
583 |           date_array[cbind(this_dyad_idx, this_code)] <- this_events[, N]
584 |         }
585 |       }
586 |       dated_arrays[[i]] <- date_array
587 |     }
588 |   }
589 | 
590 |   ###### Mean Goldstein score ties
591 |   if(tie_type %in% 'meangoldstein'){
592 | 
593 |     event_data <-  master_data[, list(date, end_date, actora, actorb, mean_goldstein)]
594 | 
595 |     for(i in 1:length(final_dates)){
596 | 
597 |       date_array <- matrix(
598 |         0
599 |         , nrow = n
600 |         , ncol = n
601 |         , dimnames = list(actors, actors)
602 |       )
603 |       this_date <- final_dates[i]
604 |       this_events <- event_data[date %in% this_date]
605 | 
606 |       if(nrow(this_events) > 0){
607 |         this_events <- unique(this_events)
608 |         this_events <- this_events[order(actora, actorb)]
609 |         this_dyad_idx <- as.matrix(
610 |           this_events[, list(as.integer(actora), as.integer(actorb))]
611 |         )
612 |         date_array[this_dyad_idx] <- this_events[, mean_goldstein]
613 |       }
614 | 
615 |       dated_arrays[[i]] <- date_array
616 |     }
617 | 
618 |   }
619 | 
620 |   ###### Separated pos/neg Goldstein score ties
621 |   if(tie_type %in% 'sepgoldstein'){
622 | 
623 | 
624 |     event_data <-  master_data[
625 |       , list(date, end_date, actora, actorb, mean_pos_goldstein, mean_neg_goldstein)
626 |       ]
627 | 
628 |     for(i in 1:length(final_dates)){
629 | 
630 |       date_array <- array(
631 |         0
632 |         , dim = c(n, n, 2)
633 |         , dimnames = list(
634 |           actors
635 |           , actors
636 |           , c('mean_pos_goldstein', 'mean_neg_goldstein')
637 |           )
638 |       )
639 | 
640 |       this_date <- final_dates[i]
641 |       this_events <- event_data[date %in% this_date]
642 | 
643 |       if(nrow(this_events) > 0){
644 |         this_events <- unique(this_events)
645 |         this_events <- this_events[order(actora, actorb)]
646 |         this_dyad_pos_idx <- as.matrix(
647 |           this_events[mean_pos_goldstein > 0, list(as.integer(actora), as.integer(actorb))]
648 |         )
649 |         date_array[cbind(this_dyad_pos_idx, 1)] <- this_events[mean_pos_goldstein > 0, mean_pos_goldstein]
650 | 
651 |         this_dyad_neg_idx <- as.matrix(
652 |           this_events[mean_neg_goldstein < 0, list(as.integer(actora), as.integer(actorb))]
653 |         )
654 |         date_array[cbind(this_dyad_neg_idx, 2)] <- this_events[mean_neg_goldstein < 0, mean_neg_goldstein]
655 |       }
656 | 
657 |       dated_arrays[[i]] <- date_array
658 |     }
659 | 
660 | 
661 |   }
662 | 
663 | 
664 |   return(dated_arrays)
665 | }
666 | 
667 | 


--------------------------------------------------------------------------------
/R/agents_doc.R:
--------------------------------------------------------------------------------
 1 | #'
 2 | #' ICEWS CAMEO actor codes
 3 | #'
 4 | #' Merge table to convert actors to CAMEO format using conversion tables created
 5 | #'    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
 6 | #'
 7 | #' @docType data
 8 | #'
 9 | #' @usage data(agents)
10 | #'
11 | #' @keywords datasets
12 | #'
13 | #'
14 | "agents"
15 | 


--------------------------------------------------------------------------------
/R/convert_cameo.R:
--------------------------------------------------------------------------------
 1 | #' Convert CAMEO Codes
 2 | #'
 3 | #' Converts CAMEO codes. CAMEO is an event ontology used in event data projects, including Phoenix. 
 4 | #'
 5 | #' @param cameo Vector of CAMEO event codes.
 6 | #' @keywords event data
 7 | #' @export
 8 | #' @aliases countrycode
 9 | #' @examples
10 | #' events$Description <- phoxy::convert_cameo(events$EventCode) # Vector of values to be converted
11 | #' 
12 | convert_cameo <- function(cameo){
13 |   data(convert_cameo_data, envir=environment())
14 |   # Sanity check
15 | #  origin_codes <- names(phoxy::countrycode_data)[!(names(phoxy::countrycode_data) %in% c("continent","region","regex"))]
16 | #  destination_codes <- names(phoxy::countrycode_data)[!(names(phoxy::countrycode_data) %in% c("regex"))]
17 | #  if (!origin %in% origin_codes){stop("Origin code not supported")}
18 | #  if (!destination %in% destination_codes){stop("Destination code not supported")}
19 | #  if (origin == 'country.name'){
20 | #    dict = na.omit(countrycode::countrycode_data[,c('regex', destination)])
21 |   #}else{
22 |   dict <- na.omit(convert_cameo_data[,c("CAMEOcode", "EventDescription")])
23 |   #}
24 |   # Prepare output vector
25 |   destination_vector <- rep(NA, length(cameo))
26 |   # All but regex-based operations
27 |     matches <- match(cameo, dict[, "CAMEOcode"])
28 |     destination_vector <- dict[matches, "EventDescription"]
29 | return(destination_vector)
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/R/convert_cameo_data.R:
--------------------------------------------------------------------------------
 1 | #' CAMEO code translation data frame
 2 | #'
 3 | #' A data frame with 310 rows and 2 columns.
 4 | #' Used internally by the \code{convert_cameo()} function.
 5 | #'
 6 | #' \itemize{
 7 | #'   \item CAMEOcode: the 310 different low-level CAMEO codes.
 8 | #'   \item EventDescription: Human-readable descriptions of the codes.
 9 | #' }
10 | #'
11 | #' @note The current CAMEO codebook is located here: \url{http://eventdata.parusanalytics.com/data.dir/cameo.html}.
12 | #' 
13 | #' @docType data
14 | #' @keywords datasets
15 | #' @name convert_cameo_data
16 | #' @usage convert_cameo_data
17 | #' @format A data frame with 310 rows and 2 columns
18 | NULL
19 | 


--------------------------------------------------------------------------------
/R/download_icews.R:
--------------------------------------------------------------------------------
 1 | #' Download the ICEWS Dataset
 2 | #'
 3 | #' Download and unzip all of the data files for the ICEWS dataset from the
 4 | #' Harvard Dataverse into a given directory.
 5 | #'
 6 | #' @param destpath The path to the directory where ICEWS should go.
 7 | #'
 8 | #' @return NULL
 9 | #' @author Original code and concept: Tony Boyles
10 | #' @note This function is still in development and may contain errors and change quickly.
11 | #' @examples
12 | #'
13 | #' download_icews("~/ICEWS/")
14 | #'
15 | #' @rdname download_icews
16 | 
17 | #' @export
18 | #' @import RCurl
19 | #' @importFrom plyr l_ply progress_text
20 | #' @import dataverse
21 | #'
22 | 
23 | ## Get ICEWS links
24 | get_icewslinks <- function(dv_server, dv_key){
25 | 
26 |   ## Set dataverse metadata: API key linked to phoenixNet account
27 |   #Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
28 |   #Sys.setenv("DATAVERSE_KEY" = "b95cd0bd-2295-4292-9402-bf52e34a95b7") this is mine don't use it :[
29 | 
30 |   ## Get ICEWS event data information
31 |   icews_data <- dataverse::get_dataset('doi:10.7910/DVN/28075')
32 |   icews_repos <- data.table(
33 |     label = icews_data$files$filename
34 |     , id = icews_data$files$id
35 |   )
36 |   icews_repos <- icews_repos[grep('.tab', icews_repos$label), ]
37 |   icews_metadata <- sapply(sapply(icews_repos$label, 'strsplit', '\\.'), '[[', 3)
38 | 
39 |   baseURL <- "https://dataverse.harvard.edu/api/access/datafile/"
40 |   icews_repos[, url := paste0(baseURL, icews_repos$id)]
41 | 
42 |   return(icews_repos)
43 | }
44 | 
45 | 
46 | # given a list of links, download them and write to specified directory
47 | dw_icewsfile <- function(link, destpath, metadata = link_data){
48 | 
49 |   filename <- paste0(destpath, '/', metadata[id %in% link, label])
50 |   fullURL <- metadata[id %in% link, url]
51 | 
52 |   # download method
53 |   if (.Platform$OS.type == 'windows') {
54 |     download_method <- 'auto'
55 |   } else{
56 |     download_method <- 'curl'
57 |   }
58 | 
59 |   download.file(fullURL, filename, method = download_method, quiet = T)
60 |   try({unzip(filename, exdir = destpath, unzip = "internal", setTimes = FALSE)}
61 |       , silent = T)
62 | 
63 | 
64 |   # if(substr(filename, nchar(filename)-3, nchar(filename)) == '.zip'){
65 |   #   unlink(temp)
66 |   # }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/R/download_phoenix.R:
--------------------------------------------------------------------------------
 1 | #' Download the Phoenix Dataset
 2 | #'
 3 | #' Download and unzip all of the data files for the Phoenix dataset from the
 4 | #' Phoenix data website into a given directory.
 5 | #'
 6 | #' @param destpath The path to the directory where Phoenix should go.
 7 | #' @param phoenix_version. Download a specific version of Phoenix ("v0.1.0" or the current version by default).
 8 | #'
 9 | #' @return NULL
10 | #' @author Original code credit: Andy Halterman
11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly.
12 | #' @examples
13 | #'
14 | #' download_phoenix("~/OEDA/phoxy_test/", phoenix_version = "current")
15 | #'
16 | #' @rdname download_phoenix
17 | 
18 | 
19 | ## Function 1:
20 | ##    Process the start/end dates desired, and generate a list of
21 | ##    data links to try and download.
22 | get_phoenixlinks <- function(
23 |   start_date = as.Date('2014-06-20')
24 |   , end_date = Sys.Date()
25 | ) {
26 | 
27 |   # Create a range of dates for which to download Phoenix data.
28 |   dates <- seq.Date(
29 |     start_date
30 |     , end_date
31 |     , by = 'day'
32 |   )
33 |   dates <- as.integer(format(dates, '%Y%m%d'))
34 | 
35 |   # Access the Phoenix raw data from Amazon repository.
36 |   links <- paste0(
37 |     'https://s3.amazonaws.com/oeda/data/current/events.full.'
38 |     , dates
39 |     , '.txt.zip'
40 |   )
41 | 
42 |   return(links)
43 | }
44 | 
45 | ### Function 2:
46 | ##    Given a single link, try to download that specific Phoenix data file.
47 | ##    If that day's data is not available, notify the user with an error message.
48 | dw_phoenixfile <- function(link, destpath) {
49 |   # extract filename from link
50 |   m <- regexpr('[^/]*(?=\\.zip$)', link, perl = T)
51 |   filename <- regmatches(link, m)
52 | 
53 |   # download method
54 |   if (.Platform$OS.type == 'windows') {
55 |     download_method <- 'auto'
56 |   } else{
57 |     download_method <- 'curl'
58 |   }
59 | 
60 |   # Attempt to download and unzip to destpath
61 |   temp <- tempfile()
62 |   download.file(link, temp, method = download_method, quiet = T)
63 |   options(warn = 2)
64 | 
65 |   tryCatch(
66 |     unzip(temp, exdir = destpath)
67 |     , error = function(e){
68 |       message(
69 |         paste(
70 |           'Unable to download file '
71 |           , filename
72 |           , '. It appears that Phoenix data for this date is missing.'
73 |           , sep = ''
74 |         )
75 |       )
76 |     }
77 |   )
78 | 
79 |   options(warn = 1)
80 |   unlink(temp)
81 | }
82 | 


--------------------------------------------------------------------------------
/R/extract_dyadstats.R:
--------------------------------------------------------------------------------
 1 | #'
 2 | #' Extract dyad-level statistics from a given event-network.
 3 | #'
 4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set
 5 | #'  of dyad-level statistics for output.
 6 | #'
 7 | #' @param input_date A date in integer %Y%m%d format.
 8 | #' @param event_dnet network object object containing a set of interactions.
 9 | #'
10 | #' @return net_stats Table of dyad-level statistics.
11 | #'
12 | #' @keywords phoenix, event data
13 | #'
14 | #' @import data.table
15 | #' @import countrycode
16 | #' @import reshape2
17 | #' @import statnet
18 | #' @import tsna
19 | #' @import plyr
20 | #' @import lubridate
21 | #' @import igraph
22 | #' @import intergraph
23 | #'
24 | #' @export
25 | 
26 | 
27 | extract_dyadstats <- function(input_date = this_date, event_dnet = tsna_obj){
28 | 
29 |   ######
30 |   #
31 |   # Extract daily network and convert to igraph
32 |   #
33 |   ######
34 | 
35 |   ## Collapse to daily network
36 |   net_obj <- network.collapse(event_dnet, at = input_date)
37 | 
38 |   ## Convert input date to an actual date object
39 |   input_date <- as.Date(as.character(input_date), format = '%Y%m%d')
40 | 
41 |   ## Convert to igraph object via 'intergraph' for additional metrics
42 |   daily_graph <- intergraph::asIgraph(net_obj)
43 | 
44 |   ######
45 |   #
46 |   # Extract a set of DYAD-LEVEL statistics
47 |   #
48 |   ######
49 | 
50 |   ## Community detection
51 |   ic <- igraph::infomap.community(daily_graph)
52 | 
53 |   ## Get community membership
54 |   ic_membership <- igraph::membership(ic)
55 | 
56 |   ## Number and size of N>1 communities detected
57 |   num_ic <- length(igraph::sizes(ic)[igraph::sizes(ic) > 1])
58 |   size_ic <- sort(igraph::sizes(ic)[igraph::sizes(ic) > 1], decreasing = T)
59 | 
60 |   ## Convert to edgelist
61 |   comm_ids <- (ic_membership[ic_membership %in% names(size_ic)])
62 |   comm_members <- which(ic_membership %in% comm_ids)
63 |   comm_ids <- as.integer(as.factor(comm_ids))
64 |   comm_edgelist <- cbind(comm_ids, comm_members)
65 | 
66 |   ## Convert to bimodal adjacency matrix
67 |   comm_membership <- matrix(0, length(unique(comm_ids)), 255)
68 |   rownames(comm_membership) <- sort(unique(comm_ids))
69 |   colnames(comm_membership) <- 1:255
70 |   comm_membership[comm_edgelist[,]] <- 1
71 | 
72 |   ## Matrix multiply to get shared membership matrix
73 |   comm_adj <-  t(comm_membership) %*% comm_membership
74 | 
75 |   ## Convert to daily edgelist
76 |   comm_try <- try({
77 |     comm_ties <- data.table(input_date, which(comm_adj == 1, arr.ind = T))
78 |   }, silent = T)
79 |   if(class(comm_try)[1] == 'try-error'){
80 |     comm_ties <- data.table('input_date' = NA
81 |                             , 'nodea' = NA
82 |                             , 'nodeb' = NA)
83 |   }
84 |   setnames(comm_ties, c('date', 'nodea', 'nodeb'))
85 |   comm_ties <- comm_ties[nodea != nodeb]
86 |   setkeyv(comm_ties, c('nodea', 'nodeb'))
87 | 
88 |   return(comm_ties)
89 | }
90 | 


--------------------------------------------------------------------------------
/R/extract_netstats.R:
--------------------------------------------------------------------------------
  1 | #'
  2 | #' Extract network-level statistics from a given event-network.
  3 | #'
  4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set
  5 | #'  of network-level statistics for output.
  6 | #'
  7 | #' @param input_date A date in integer %Y%m%d format.
  8 | #' @param event_dnet network object object containing a set of interactions.
  9 | #'
 10 | #' @return net_stats Table of network-level statistics.
 11 | #'
 12 | #' @keywords phoenix, event data
 13 | #'
 14 | #' @import data.table
 15 | #' @import countrycode
 16 | #' @import reshape2
 17 | #' @import statnet
 18 | #' @import tsna
 19 | #' @import plyr
 20 | #' @import lubridate
 21 | #' @import igraph
 22 | #' @import intergraph
 23 | #'
 24 | #' @export
 25 | 
 26 | 
 27 | extract_netstats <- function(input_date = this_date, event_dnet = event_dnet, datelist = dates){
 28 | 
 29 |   ######
 30 |   #
 31 |   # Extract daily network and convert to igraph
 32 |   #
 33 |   ######
 34 | 
 35 |   ## Collapse to daily network
 36 |   net_obj <- network.collapse(event_dnet, at = input_date)
 37 | 
 38 |   ## Convert input date to an actual date object
 39 |   prev_date <- datelist[which(datelist %in% input_date) - 1]
 40 |   input_date <- as.Date(as.character(input_date), format = '%Y%m%d')
 41 | 
 42 |   if(network::network.edgecount(net_obj) == 0){
 43 |     return(data.table(date = input_date
 44 |                       , net_jaccard = 0, net_hamming = 0
 45 |                       , net_degree = 0, net_density = 0
 46 |                       , net_trans = 0, net_modularity = 0
 47 |                       , num_communities = 0, comm_meansize = 0
 48 |                       , xcomm_ties = 0
 49 |                       , dyads_mut = 0, dyads_asym = 0
 50 |                       , dyads_null = 0
 51 |                       , triads_003 = 0, triads_012 = 0
 52 |                       , triads_102 = 0, triads_021D = 0
 53 |                       , triads_021U = 0, triads_021C = 0
 54 |                       , triads_111D = 0, triads_111U = 0
 55 |                       , triads_030T = 0, triads_030C = 0
 56 |                       , triads_201 = 0, triads_120D = 0
 57 |                       , triads_120U = 0, triads_120C = 0
 58 |                       , triads_210 = 0, triads_300 = 0))
 59 |   }
 60 |   ## Convert to igraph object via 'intergraph' for additional metrics
 61 |   daily_graph <- intergraph::asIgraph(net_obj)
 62 | 
 63 |   ######
 64 |   #
 65 |   # Extract a set of NETWORK-LEVEL statistics
 66 |   #
 67 |   ######
 68 | 
 69 |   #### Changes from previous time period
 70 |   try_prev <- try({
 71 |     ## Get previous time period
 72 |     net_obj_t1 <- network.collapse(event_dnet, at = prev_date)
 73 | 
 74 |     ## Convert to matrices
 75 |     net_mat_t1 <- as.matrix.network(net_obj_t1)
 76 |     net_mat <- as.matrix.network(net_obj)
 77 | 
 78 |     ## Jaccard index
 79 |     net_overlap <- net_mat_t1 + net_mat
 80 |     net_intersect <- sum(net_overlap == 2)
 81 |     net_union <- sum(net_overlap >= 1)
 82 |     net_difference <- sum(net_overlap == 0)
 83 |     net_jaccard <- net_intersect / net_union
 84 | 
 85 |     ## Hamming distance
 86 |     net_hamming <- (net_intersect + net_difference) / length(net_mat)
 87 |   }, silent = T)
 88 |   if(class(try_prev)[1] == 'try-error'){
 89 |     net_jaccard <- NA
 90 |     net_hamming <- NA
 91 |   }
 92 | 
 93 |   ## Mean degree
 94 |   # Since it's a mean, in- vs out-degree doesn't matter
 95 |   net_degree <- mean(sna::degree(as.matrix.network(net_obj), gmode = 'digraph'))
 96 | 
 97 |   ## Density
 98 |   net_density <- network.density(net_obj)
 99 | 
100 |   ## Transitivity
101 |   net_trans <- gtrans(net_obj, diag =  F, mode = 'digraph')
102 | 
103 |   ## Dyad census
104 |   net_dyads <- sna::dyad.census(as.matrix.network(net_obj))
105 |   dimnames(net_dyads)[[2]] <- paste0('dyad', dimnames(net_dyads)[[2]])
106 | 
107 |   ## Triad census
108 |   net_triads <- sna::triad.census(as.matrix.network(net_obj), mode = 'digraph')
109 |   dimnames(net_triads)[[2]] <- paste0('triad', dimnames(net_triads)[[2]])
110 | 
111 |   ## Community detection
112 |   ic <- igraph::infomap.community(daily_graph)
113 | 
114 |   ## Network community modularity
115 |   ic_mod <- igraph::modularity(ic)
116 | 
117 |   ## Number and size of N>1 communities detected
118 |   num_ic <- length(igraph::sizes(ic)[igraph::sizes(ic) > 1])
119 |   size_ic <- sort(igraph::sizes(ic)[igraph::sizes(ic) > 1], decreasing = T)
120 | 
121 |   ## Mean community size of N>1 communities
122 |   meansize_ic <- mean(size_ic)
123 | 
124 |   ## Share of total ties that connect different communities
125 |   share_crossings <- sum(igraph::crossing(ic, daily_graph) == T) /
126 |     length(igraph::crossing(ic, daily_graph))
127 | 
128 |   ## Output network stats
129 |   return(data.table(date = input_date
130 |                     , net_jaccard = net_jaccard, net_hamming = net_hamming
131 |                     , net_degree = net_degree, net_density = net_density
132 |                     , net_trans = net_trans, net_modularity = ic_mod
133 |                     , num_communities = num_ic, comm_meansize = meansize_ic
134 |                     , xcomm_ties = share_crossings
135 |                     , dyads_mut = net_dyads[1], dyads_asym = net_dyads[2]
136 |                     , dyads_null = net_dyads[3]
137 |                     , triads_003 = net_triads[1], triads_012 = net_triads[2]
138 |                     , triads_102 = net_triads[3], triads_021D = net_triads[4]
139 |                     , triads_021U = net_triads[5], triads_021C = net_triads[6]
140 |                     , triads_111D = net_triads[7], triads_111U = net_triads[8]
141 |                     , triads_030T = net_triads[9], triads_030C = net_triads[10]
142 |                     , triads_201 = net_triads[11], triads_120D = net_triads[12]
143 |                     , triads_120U = net_triads[13], triads_120C = net_triads[14]
144 |                     , triads_210 = net_triads[15], triads_300 = net_triads[16]
145 |                     ))
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/R/extract_nodestats.R:
--------------------------------------------------------------------------------
  1 | #'
  2 | #' Extract node-level statistics from a given event-network.
  3 | #'
  4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set
  5 | #'  of node-level statistics for output.
  6 | #'
  7 | #' @param input_date A date in integer %Y%m%d format.
  8 | #' @param event_dnet network object object containing a set of interactions.
  9 | #'
 10 | #' @return net_stats Table of node-level statistics.
 11 | #'
 12 | #' @keywords phoenix, event data
 13 | #'
 14 | #' @import data.table
 15 | #' @import countrycode
 16 | #' @import reshape2
 17 | #' @import statnet
 18 | #' @import tsna
 19 | #' @import plyr
 20 | #' @import lubridate
 21 | #' @import igraph
 22 | #' @import intergraph
 23 | #'
 24 | #' @export
 25 | 
 26 | extract_nodestats <- function(input_date = this_date, event_dnet = tsna_obj){
 27 | 
 28 |   ######
 29 |   #
 30 |   # Extract daily network and convert to igraph
 31 |   #
 32 |   ######
 33 | 
 34 |   ## Collapse to daily network
 35 |   net_obj <- network.collapse(event_dnet, at = input_date)
 36 | 
 37 |   ## Convert input date to an actual date object
 38 |   input_date <- as.Date(as.character(input_date), format = '%Y%m%d')
 39 | 
 40 |   ## Write a weird little workaround for the final day of an empty tsna
 41 |   ##  object: by default it is a zero-node network, which is odd.
 42 | #   if(network.size(net_obj) == 0){
 43 | #     filler <- matrix(rep(0, 255), nrow = 1)
 44 | #     dimnames(filler)[[2]] <- paste0('node', 1:255)
 45 | #     return(rbind(as.data.table(cbind(date = input_date
 46 | #                                       , node_stat = 'trans', filler))
 47 | #                  , as.data.table(cbind(date = input_date
 48 | #                                       , node_stat = 'indegree', filler))
 49 | #                  , as.data.table(cbind(date = input_date
 50 | #                                       , node_stat = 'outdegree', filler))
 51 | #                  , as.data.table(cbind(date = input_date
 52 | #                                       , node_stat = 'between', filler))))
 53 | #   }
 54 | 
 55 |   nodes <- network.vertex.names(net_obj)
 56 |   ## Convert to igraph object via 'intergraph' for additional metrics
 57 |   daily_graph <- intergraph::asIgraph(net_obj)
 58 | 
 59 |   ######
 60 |   #
 61 |   # Extract a set of NODE-LEVEL statistics
 62 |   #
 63 |   ######
 64 | 
 65 |   ## Transitivity
 66 |   trans_dist <- matrix(igraph::transitivity(daily_graph, type = 'local'
 67 |                                             , isolates = 'zero'), nrow = 1)
 68 |   dimnames(trans_dist)[[2]] <- nodes
 69 |   trans_dist <- as.data.table(cbind(date = input_date
 70 |                                     , node_stat = 'trans', trans_dist))
 71 | 
 72 |   ## Degree
 73 |   # Indegree
 74 |   indegree_dist <- matrix(sna::degree(as.matrix.network(net_obj)
 75 |                                       , cmode = 'indegree'
 76 |                                       , rescale = T), nrow = 1)
 77 |   indegree_dist[is.nan(indegree_dist)] <- 0
 78 |   dimnames(indegree_dist)[[2]] <- nodes
 79 |   indegree_dist <- as.data.table(cbind(date = input_date
 80 |                                        , node_stat = 'indegree', indegree_dist))
 81 | 
 82 |   # Outdegree
 83 |   outdegree_dist <- matrix(sna::degree(as.matrix.network(net_obj)
 84 |                                        , cmode = 'outdegree'
 85 |                                        , rescale = T), nrow = 1)
 86 |   outdegree_dist[is.nan(outdegree_dist)] <- 0
 87 |   dimnames(outdegree_dist)[[2]] <- nodes
 88 |   outdegree_dist <- as.data.table(cbind(date = input_date
 89 |                                         , node_stat = 'outdegree', outdegree_dist))
 90 | 
 91 |   ## Betweenness
 92 |   between_dist <- matrix(sna::betweenness(as.matrix.network(net_obj)
 93 |                                           , gmode = 'digraph'
 94 |                                           , rescale = T), nrow = 1)
 95 |   between_dist[is.nan(between_dist)] <- 0
 96 |   dimnames(between_dist)[[2]] <- nodes
 97 |   between_dist <- as.data.table(cbind(date = input_date
 98 |                                       , node_stat = 'between', between_dist))
 99 | 
100 |   ## Reciprocity
101 |   recip_mat <- as.matrix.network(net_obj)
102 |   recip_fun <- function(position, x){
103 |     return(sum(x[position, ] == x[, position]))
104 |   }
105 |   recip_dist <- matrix(sapply(1:nrow(recip_mat), recip_fun, recip_mat) / nrow(recip_mat)
106 |                        , nrow = 1)
107 |   dimnames(recip_dist)[[2]] <- nodes
108 |   recip_dist <- as.data.table(cbind(date = input_date
109 |                               , node_stat = 'recip', recip_dist))
110 | 
111 |   ## Combined metric
112 |   out_data <- data.table(rbind(trans_dist, indegree_dist
113 |                                , outdegree_dist, between_dist, recip_dist))
114 | 
115 |   dtnew <- out_data[, lapply(.SD, as.numeric)]
116 |   # dtnew2 <- dtnew[, lapply(.SD, scale)]
117 |   dtnew2 <- copy(dtnew)
118 |   dtnew2[, date := NULL]
119 |   dtnew2[, node_stat := NULL]
120 |   dtnew2 <- data.frame(dtnew2)
121 |   dtnew2 <- abs(dtnew2)
122 |   for(i in 1:nrow(dtnew2)){
123 |     dtnew2[i, ] <- scale(as.matrix(dtnew2)[i,])
124 |   }
125 |   dtnew[, node_stat := out_data$node_stat]
126 |   out_data <- dtnew
127 |   combined <- as.data.table(cbind(date = input_date, node_stat = 'combined1'
128 |                                   , matrix(colSums(dtnew2), nrow = 1)))
129 |   combined2 <- as.data.table(cbind(date = input_date, node_stat = 'combined2'
130 |                                    , matrix(colSums(((dtnew2)+1)^2), nrow = 1)))
131 |   setnames(combined, names(combined)[-c(1:2)], names(between_dist)[-c(1:2)])
132 |   setnames(combined2, names(combined2)[-c(1:2)], names(between_dist)[-c(1:2)])
133 |   out_data <- rbind(out_data, combined, combined2)
134 |   dtnew <- out_data[, lapply(.SD, as.numeric)]
135 |   dtnew[, node_stat := out_data$node_stat]
136 |   out_data <- dtnew
137 |   return(out_data)
138 | }
139 | 


--------------------------------------------------------------------------------
/R/icews_cameo.R:
--------------------------------------------------------------------------------
  1 | #'
  2 | #' Convert ICEWS state/actor codes into CAMEO format,
  3 | #'  and extract root codes from specific CAMEO event codes.
  4 | #'
  5 | #'  Intake a set of ICEWS data (read in after some pre-processing)
  6 | #'    and convert entries to CAMEO format using conversion tables created
  7 | #'    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
  8 | #'
  9 | #'  @param icews ICEWS data as one large data.table
 10 | #'
 11 | #'  @return icews ICEWS data with several new CAMEO code columns.
 12 | #'
 13 | #'  @keywords phoenix, event data
 14 | #'
 15 | #'  @import data.table
 16 | #'  @import plyr
 17 | #'
 18 | #'  @export
 19 | 
 20 | icews_cameo <- function(icews){
 21 | 
 22 |   ######
 23 |   #
 24 |   # Read in data for conversions
 25 |   # (created by Phil Schrodt)
 26 |   #
 27 |   ######
 28 | 
 29 |   data(agents, envir = environment())
 30 |   data(states, envir = environment())
 31 | 
 32 |   ######
 33 |   #
 34 |   # Functions
 35 |   #
 36 |   ######
 37 | 
 38 |   ## Paste-function that can handle NA entries
 39 |   ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste)
 40 |   paste3 <- function(...,sep=", ") {
 41 |     L <- list(...)
 42 |     L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
 43 |     ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"",
 44 |                gsub(paste0(sep,sep),sep,
 45 |                     do.call(paste,c(L,list(sep=sep)))))
 46 |     is.na(ret) <- ret==""
 47 |     ret
 48 |   }
 49 | 
 50 |   ## Conversion function: intake list of sectors, return the CAMEO actor
 51 |   ##  code for the 'most important' actor in the list
 52 |   cameo_convert <- function(in_data){
 53 |     this_source <- data.table(actor = sapply(strsplit(in_data, ','), '[', 1))
 54 |     this_codes <- merge(this_source, agents, sort = F, all.x = T, by = 'actor')
 55 |     return(substr(this_codes$code1, 1, 3))
 56 | 
 57 |   }
 58 | 
 59 |   ######
 60 |   #
 61 |   # Set up data storage objects
 62 |   #
 63 |   ######
 64 | 
 65 |   ## Ordered list of CAMEO agent codes to extract for the agent field
 66 |   agentcodes = c('GOV','MIL','REB','OPP', 'PTY', 'COP','JUD','SPY'
 67 |                  ,'IGO','MED','EDU','BUS','CRM','CVL','---')
 68 | 
 69 |   ## Tables of unique source/target sectors
 70 |   source_table <- data.table(Source.Sectors = unique(icews$Source.Sectors)
 71 |                              , source_codes = NA_character_)
 72 |   target_table <- data.table(Target.Sectors = unique(icews$Target.Sectors)
 73 |                              , target_codes = NA_character_)
 74 | 
 75 |   ######
 76 |   #
 77 |   # Convert ICEWS codes to CAMEO codes
 78 |   #
 79 |   ######
 80 | 
 81 |   ## Convert unique source/target sector codes
 82 |   source_table[, source_codes := cameo_convert(Source.Sectors)]
 83 |   target_table[, target_codes := cameo_convert(Target.Sectors)]
 84 |   icews <- merge(icews, source_table, by = 'Source.Sectors', all.x = T, sort = F)
 85 |   icews <- merge(icews, target_table, by = 'Target.Sectors', all.x = T, sort = F)
 86 | 
 87 |   ## Convert unique state codes
 88 |   setnames(states, c('Source.Country', 'source_isoc', 'source_cown'))
 89 |   icews <- merge(icews, states, by = 'Source.Country', all.x = T, sort = F)
 90 |   setnames(states, c('Target.Country', 'target_isoc', 'target_cown'))
 91 |   icews <- merge(icews, states, by = 'Target.Country', all.x = T, sort = F)
 92 | 
 93 |   ######
 94 |   #
 95 |   # Generate source/actor entity codes a la CAMEO
 96 |   #
 97 |   ######
 98 | 
 99 |   icews[, sourceactorentity := paste3(icews$source_isoc, icews$source_codes, sep = '')]
100 |   icews[, targetactorentity := paste3(icews$target_isoc, icews$target_codes, sep = '')]
101 | 
102 |   ######
103 |   #
104 |   # Extract root codes from CAMEO codes
105 |   #
106 |   ######
107 | 
108 |   for(i in seq(10, 200, by = 10)){
109 |     icews[eventcode %in% c(i:(i+9), c((i*10+11):(i*10+90))), rootcode := i/10]
110 |   }
111 | 
112 |   ######
113 |   #
114 |   # Write out cleaned ICEWS data
115 |   #
116 |   ######
117 | 
118 |   return(icews)
119 | }
120 | 


--------------------------------------------------------------------------------
/R/ingest_histphoenix.R:
--------------------------------------------------------------------------------
 1 | #' Ingest the historic Phoenix Dataset
 2 | #'
 3 | #' Given a directory with the historic Phoenix dataset files, quickly read
 4 | #' them all in, name them correctly, and combine them into one dataframe.
 5 | #'
 6 | #' @param histphoenix_loc The path to the Phoenix folder.
 7 | #' @param start_date
 8 | #' @param end_date
 9 | #'
10 | #' @return A single dataframe with all the historic Phoenix events in the folder.
11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly.
12 | #' @examples
13 | #'
14 | #' events <- ingest_histphoenix("~/histphoenix")
15 | #'
16 | #' @import data.table
17 | #' @import countrycode
18 | #' @import bit64
19 | #' @rdname ingest_histphoenix
20 | #' @export
21 | 
22 | ingest_histphoenix <- function(
23 |   histphoenix_loc
24 |   , .start_date = start_date
25 |   , .end_date = end_date
26 |   , .statelist = statelist
27 |   ){
28 | 
29 |   ## Identify appropriate files in the historic Phoenix folder - don't want
30 |   ##  to try and read in PDF docs or .csv metadata tables
31 |   histphoenix_files <- list.files(histphoenix_loc)[
32 |     intersect(
33 |       grep('Phoenix', list.files(histphoenix_loc))
34 |       , grep('csv', list.files(histphoenix_loc))
35 |     )
36 |   ]
37 | 
38 |   ## Read in Phoenix files from historic sources
39 |   histphoenix_data <- data.table()
40 |   for(filename in histphoenix_files){
41 |     this_phoenix <- fread(paste(histphoenix_loc, filename, sep = '/'))
42 |     this_phoenix$aid <- bit64::as.integer64(this_phoenix$aid)
43 |     histphoenix_data <- rbind(
44 |       histphoenix_data
45 |       , this_phoenix
46 |     )
47 |   }
48 | 
49 |   ######
50 |   #
51 |   # Parse the historic Phoenix data
52 |   #
53 |   ######
54 | 
55 |   ######################################
56 |   ## IMPORTANT ASSUMPTION HERE:
57 |   ## I am *ASSUMING* that NULL/NA entries after a state code
58 |   ##  implies that the actor is the GOVERNMENT. As such I am replacing
59 |   ##  all such missing entries with 'GOV'.
60 |   ######################################
61 |   histphoenix_data[source_root %in% .statelist
62 |                    & (source_agent == ''
63 |                       | source_agent == 'GOV')
64 |                    , source := paste0(source_root, 'GOV')]
65 |   histphoenix_data[target_root %in% .statelist
66 |                    & (target_agent == ''
67 |                       | source_agent == 'GOV')
68 |                    , target := paste0(target_root, 'GOV')]
69 | 
70 |   ## Drop any missing data
71 |   histphoenix_data <- histphoenix_data[!is.na(code)]
72 |   histphoenix_data <- histphoenix_data[!is.na(root_code)]
73 |   histphoenix_data <- histphoenix_data[!is.na(goldstein)]
74 |   histphoenix_data <- histphoenix_data[!is.na(quad_class)]
75 | 
76 |   ## Parse dates
77 |   histphoenix_data$date <- as.Date(histphoenix_data$story_date, format = '%m/%d/%Y')
78 | 
79 |   return(histphoenix_data)
80 | }
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/R/ingest_icews.R:
--------------------------------------------------------------------------------
 1 | #' Ingest the ICEWS Event Dataset
 2 | #'
 3 | #' Given a directory with individual ICEWS dataset files, quickly read
 4 | #' them all in, name them correctly, and combine them into one dataframe.
 5 | #'
 6 | #' @param dir The path to the ICEWS folder.
 7 | #' @param start_date Start of date range as YYYYMMDD integer format.
 8 | #' @param end_date End of date range as YYYYMMDD integer format.
 9 | #'
10 | #' @return A single dataframe with all the ICEWS events in the folder.
11 | #' @author Andy Halterman, forked by Jesse Hammond
12 | #' @note This function is still in development and may contain errors and change quickly.
13 | #' @examples
14 | #'
15 | #' events <- ingest_icews("~/ICEWS/study_28075/Data/", 20101201, 20140101)
16 | #'
17 | #' @rdname ingest_icews
18 | #' @export
19 | 
20 | ingest_icews <- function(dir, start_date, end_date){
21 |   # Handle messy file paths
22 |   lastletter <- stringr::str_sub(dir ,-1, -1)
23 |   if (lastletter != "/"){
24 |     dir <- paste0(dir, "/")
25 |   }
26 | 
27 |   ## List files
28 |   files <- list.files(dir)
29 | 
30 |   ## Quick regex in case of zips still there
31 |   files <- files[grep("\\.tab$", files)]
32 | 
33 |   ## Pull files that fall in the date range provided
34 |   startyear <- as.integer(substr(start_date, 1, 4))
35 |   endyear <- as.integer(substr(end_date, 1, 4))
36 |   filesyears <- as.integer(
37 |     do.call('rbind', (stringr::str_split(files, '\\.')))[, 2])
38 |   if(endyear > max(filesyears)){
39 |     message('Note: specified range exceeds the most recent ICEWS entries.')
40 |   }
41 |   files <- files[filesyears >= startyear & filesyears <= endyear]
42 |   files <- paste0(dir, files)
43 | 
44 |   ## Set column dtypes
45 |   coltypes <- c('integer', rep('character', 5), 'integer', 'numeric'
46 |                 , rep('character', 3), 'integer', 'integer'
47 |                 , rep('character', 5), 'numeric', 'numeric')
48 | 
49 |   ## Quick and dirty: fread all files
50 |   read_one <- function(file){
51 |     t <- tryCatch(fread(file, stringsAsFactors = F, sep = '\t'
52 |                         , colClasses = coltypes, na.strings = '')
53 |                   , error = function(e) message(paste0('error reading ', file)))
54 |     if(is.null(t) == F){
55 |       return(t)
56 |     } else {
57 |       message('object is not a data.frame')
58 |     }
59 |   }
60 |   message("Reading in files...")
61 |   event_list  <- plyr::llply(files, read_one, .progress = plyr::progress_text(char = '='))
62 | 
63 |   # Bind everything together
64 |   events <- rbindlist(event_list)
65 | 
66 |   if(nrow(events) > 0){
67 |     # Set names
68 |     names(events) <- c("event_id", "date", "Source.Name", "Source.Sectors",
69 |                        "Source.Country", "Event.Text", "eventcode", "goldstein", "Target.Name",
70 |                        "Target.Sectors", "Target.Country", "Story.ID", "Sentence.Number",
71 |                        "Publisher", "City", "District", "Province", "Country", "Latitude",
72 |                        "Longitude")
73 |     # Use lubridate, then de-POSIX the date.
74 |     events$date <- as.Date(lubridate::ymd(events$date))
75 |     message("Process complete")
76 |     return(events)
77 |     
78 |   } else{
79 |     events <- data.table(date = structure(NA_real_, class="Date")
80 |                          , sourceactorentity = NA_character_
81 |                          , targetactorentity = NA_character_
82 |                          , rootcode = NA_integer_
83 |                          , eventcode = NA_integer_
84 |                          , goldstein = NA_real_)
85 |     message("Process complete")
86 |     return(events)
87 |   }
88 | }
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/R/ingest_phoenix.R:
--------------------------------------------------------------------------------
 1 | #' Ingest the Phoenix Dataset
 2 | #'
 3 | #' Given a directory with individual Phoenix dataset files, quickly read
 4 | #' them all in, name them correctly, and combine them into one dataframe.
 5 | #'
 6 | #' @param phoenix_loc The path to the Phoenix folder.
 7 | #' @param start_date Start of date range as YYYYMMDD integer format.
 8 | #' @param end_date End of date range as YYYYMMDD integer format.
 9 | #'
10 | #' @return A single dataframe with all the Phoenix events in the folder.
11 | #' @author Andy Halterman, forked by Jesse Hammond
12 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly.
13 | #' @examples
14 | #'
15 | #' events <- ingest_phoenix("~/OEDA/phoxy_test/", 20140620, 20150101)
16 | #'
17 | #' @import data.table
18 | #' @rdname ingest_phoenix
19 | #' @export
20 | 
21 | ingest_phoenix <- function(phoenix_loc, start_date, end_date){
22 |   # Handle messy file paths
23 |   lastletter <- stringr::str_sub(phoenix_loc ,-1, -1)
24 |   if (lastletter != "/"){
25 |     phoenix_loc <- paste0(phoenix_loc, "/")
26 |   }
27 | 
28 |   ## List files
29 |   files <- list.files(phoenix_loc)
30 | 
31 |   ## Pull files that fall in the date range provided
32 |   filesdates <- as.integer(
33 |     do.call('rbind', (stringr::str_split(files, '\\.')))[, 3])
34 |   filesdates <- as.Date(lubridate::ymd(filesdates))
35 |   if(start_date < min(filesdates)){
36 |     message('Note: specified range precedes the earliest Phoenix data.')
37 |   }
38 |   if(end_date > max(filesdates)){
39 |     message('Note: specified range exceeds the latest Phoenix data. IT\'S NOT A CRYSTAL BALL PEOPLE')
40 |   }
41 |   files <- files[filesdates >= start_date & filesdates <= end_date]
42 |   files <- paste0(phoenix_loc, files)
43 | 
44 |   ## Set column dtypes
45 |   coltypes <- c('character', rep('integer', 4), rep('character', 10)
46 |                 , 'integer',  'numeric', 'character', 'numeric'
47 |                 , 'numeric', rep('character', 6))
48 |   ## Set column name
49 |   phoenix_names <- c('eventid', 'date', 'year', 'month', 'day'
50 |                      , 'sourceactorfull', 'sourceactorentity', 'sourceactorrole'
51 |                      , 'sourceactorattribute', 'targetactorfull', 'targetactorentity'
52 |                      , 'targetactorrole', 'targetactorattribute', 'eventcode'
53 |                      , 'rootcode', 'pentaclass', 'goldstein', 'issues'
54 |                      , 'lat', 'long', 'locationname', 'statename', 'countrycode'
55 |                      , 'sentenceid', 'urls', 'newssources')
56 | 
57 |   ## Quick and dirty: fread all files
58 |   read_one <- function(file){
59 |     t <- tryCatch(data.table::fread(file, stringsAsFactors = F, sep = '\t'
60 |                         , colClasses = coltypes, na.strings = '')
61 |                   , error = function(e) message(paste0('error reading ', file)))
62 |     if(is.null(t) == F){
63 |       return(t)
64 |     } else {
65 |       message('object is not a data.frame')
66 |     }
67 |   }
68 | 
69 |   message("Reading in files...")
70 |   event_list  <- plyr::llply(files, read_one, .progress = plyr::progress_text(char = '='))
71 | 
72 |   ## Bind everything together
73 |   events <- data.table::rbindlist(event_list)
74 |   data.table::setnames(events, phoenix_names)
75 |   
76 |   ## Convert codes to INTEGER type
77 |   suppressWarnings(events$eventcode <- as.integer(events$eventcode))
78 |   suppressWarnings(events$rootcode <- as.integer(events$rootcode))
79 | 
80 |   if(nrow(events) > 0){
81 |     ## Convert dates to DATE object
82 |     events$date <- as.Date(lubridate::ymd(events$date))  # use lubridate, then de-POSIX the date.
83 |     message("Process complete")
84 |     return(events)
85 | 
86 |   } else{
87 |     events <- data.table(date = structure(NA_real_, class="Date")
88 |                          , sourceactorentity = NA_character_
89 |                          , targetactorentity = NA_character_
90 |                          , rootcode = NA_integer_
91 |                          , eventcode = NA_integer_
92 |                          , goldstein = NA_real_)
93 |     message("Process complete")
94 |     return(events)
95 |   }
96 | }
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/R/phoenix_stats.R:
--------------------------------------------------------------------------------
  1 | #'
  2 | #' Extract statistics from daily Phoenix event-networks.
  3 | #'
  4 | #'  Take a set of daily networks generated by the phoenix_net function,
  5 | #'  and extract a variety of daily statistics at the network and nodal
  6 | #'  levels.
  7 | #'
  8 | #'
  9 | #' @param dailynets networkDynamic object containing daily event-nets
 10 | #'          produced via phoenix_net function.
 11 | #' @param time_window time interval of aggregate event-network objects. Valid
 12 | #'          entries are 'day', 'week', 'month', 'year'.
 13 | #' @param codes string of event codes, root codes, or pentaclass codes.
 14 | #'          Note: these codes have to be in the same format as the original
 15 | #'          network layers created via 'phoenix_net'. If you specify rootcodes
 16 | #'          in the creation step, entering pentaclass codes in this step
 17 | #'          will produce an error.
 18 | #' @param do_parallel Logical TRUE-FALSE. Whether to use parallel backend
 19 | #'          'doMC' when extracting network statistics by code. Considerably
 20 | #'          faster than single-core, but less reliable.
 21 | #'
 22 | #'
 23 | #' @return phoenix_out a LIST object of tables containing descriptive
 24 | #'          statistics for daily event-networks.
 25 | #'
 26 | #' @keywords phoenix, event data
 27 | #'
 28 | #' @import data.table
 29 | #' @import countrycode
 30 | #' @import reshape2
 31 | #' @import statnet
 32 | #' @import tsna
 33 | #' @import plyr
 34 | #' @import lubridate
 35 | #' @import igraph
 36 | #' @import intergraph
 37 | #' @import doMC
 38 | #'
 39 | #' @export
 40 | 
 41 | phoenix_stats <- function(dailynets, time_window = 'day'
 42 |                           , codes = 'all', do_parallel = F, n_cores = 4){
 43 | 
 44 |   ######
 45 |   #
 46 |   # Set up some initial values
 47 |   #
 48 |   ######
 49 | 
 50 |   ## Initialize parallel cores
 51 |   if(do_parallel == T){
 52 |     doMC::registerDoMC(cores=n_cores)
 53 |   }
 54 | 
 55 |   ## Subset codes
 56 |   if(codes == 'all'){
 57 |     codes <- names(dailynets)
 58 |   } else{
 59 |     codes <- paste0('code', codes)
 60 |   }
 61 | 
 62 |   ## Set up dates
 63 |   start_end <- get.network.attribute(
 64 |     dailynets[[1]],'net.obs.period')$observations[[1]]
 65 |   start_date_int <- start_end[1]
 66 |   end_date_int <- start_end[2]
 67 |   start_date <- as.Date(as.character(start_date_int), format = '%Y%m%d')
 68 |   end_date <- as.Date(as.character(end_date_int), format = '%Y%m%d')
 69 |   dates <- as.integer(format(seq.Date(start_date, end_date, time_window), '%Y%m%d'))
 70 |   dates <- dates[-length(dates)]
 71 |   ndates <- length(dates)
 72 |   nodes <- network.vertex.names(dailynets[[1]])
 73 | 
 74 |   ######
 75 |   #
 76 |   # Set up some empty storage objects
 77 |   #
 78 |   ######
 79 | 
 80 |   # Storage for daily network outputs
 81 |   master_data <- vector('list', length(codes))
 82 |   names(master_data) <- as.character(codes)
 83 | 
 84 |   for(code in codes){
 85 | 
 86 |     ## Extract one set of daily event-networks
 87 |     event_dnet <- dailynets[[code]]
 88 | 
 89 |     ## Extract network-level statistics
 90 |     message(paste0('Extracting network statistics for code '
 91 |                    , substr(code, 5, nchar(code)), ' ...'))
 92 |     master_data[[code]]$netstats <- data.table(
 93 |       plyr::ldply(dates, extract_netstats, event_dnet = event_dnet
 94 |                   , datelist = dates
 95 |                   # , .progress = 'text'
 96 |                   , .parallel = do_parallel))
 97 |     master_data[[code]]$netstats$net_jaccard[1] <- NA
 98 |     master_data[[code]]$netstats$net_hamming[1] <- NA
 99 | 
100 |     ## Extract dyad-level statistics
101 |     message(paste0('Extracting dyadic shared-community statistics for code '
102 |                    , substr(code, 5, nchar(code)), ' ...'))
103 |     master_data[[code]]$dyadstats <-  data.table(
104 |       plyr::ldply(dates, extract_dyadstats, event_dnet = event_dnet
105 |                   # , .progress = 'text'
106 |                   , .parallel = do_parallel
107 |                   ))
108 | 
109 |     ## Extract node-level statistics
110 |     message(paste0('Extracting nodal centrality and transitivity statistics for code '
111 |                    , substr(code, 5, nchar(code)), ' ...'))
112 |     master_data[[code]]$nodestats <- data.table(
113 |       plyr::ldply(dates, extract_nodestats, event_dnet = event_dnet
114 |                   # , .progress = 'text'
115 |                   , .parallel = do_parallel))
116 | 
117 |   }
118 | 
119 |   return(master_data)
120 | }
121 | 
122 | 


--------------------------------------------------------------------------------
/R/phoenix_tables.R:
--------------------------------------------------------------------------------
  1 | #' Scrape, merge, and process Phoenix and ICEWS data into
  2 | #' a large data table for aggregation and subsetting.
  3 | #'
  4 | #'
  5 | #' @param phoenix_loc folder containing Phoenix data sets as daily .csv
  6 | #'          data tables. Automatically checks for new data sets each time
  7 | #'          the function is run, and downloads new daily data as it becomes
  8 | #'          available. Currently in 'one-and'done' format
  9 | #'          where it downloads the first time, and checks thereafter.
 10 | #' @param icews_loc folder containing ICEWS data sets as daily .tab data
 11 | #'          tables. Because I don't know how to work a SWORD API, these will
 12 | #'          need to be manually downloaded and updated.
 13 | #'
 14 | #' @return master_table a data.table object containing ALL merged/processed
 15 | #'          Phoenix and ICEWS data. One row per event-dyad-day.
 16 | #'
 17 | #' @rdname phoenix_tables
 18 | #'
 19 | #' @author Jesse Hammond
 20 | #'
 21 | #' @note This function is still in early development and may contain significant errors.
 22 | #'        Don't trust it.
 23 | #'
 24 | 
 25 | #' @export
 26 | #'
 27 | #' @import data.table
 28 | #' @import countrycode
 29 | #' @import lubridate
 30 | #' @import dummies
 31 | phoenix_tables <- function(phoenix_loc, icews_loc, update = T){
 32 | 
 33 |   ######
 34 |   #
 35 |   # Set up some initial values: Time windows
 36 |   #
 37 |   ######
 38 | 
 39 |   ## Date objects
 40 |   start_date <- as.Date('1995-01-01')
 41 |   end_date <- Sys.Date()
 42 |   dates <- seq.Date(start_date, end_date, by = 'day')
 43 | 
 44 |   ######
 45 |   #
 46 |   # Set up some initial values: Actors
 47 |   #
 48 |   ######
 49 | 
 50 |   ## Paste-function that can handle NA entries
 51 |   ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste)
 52 |   paste3 <- function(...,sep=", ") {
 53 |     L <- list(...)
 54 |     L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
 55 |     ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"",
 56 |                gsub(paste0(sep,sep),sep,
 57 |                     do.call(paste,c(L,list(sep=sep)))))
 58 |     is.na(ret) <- ret==""
 59 |     ret
 60 |   }
 61 | 
 62 |   ## Set up set of secondary actor codes
 63 |   secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD'
 64 |                         , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL')
 65 |   statelist <- countrycode::countrycode_data$iso3c
 66 |   actors <- unique(statelist[statelist %in% states$isoc])
 67 |   actors <- actors[!is.na(actors)]
 68 |   actors <- c(actors, unique(as.vector(outer(actors, secondary_actors, paste, sep = ''))))
 69 |   actors <- as.factor(sort(actors))
 70 |   n <- length(actors)
 71 | 
 72 |   ######
 73 |   #
 74 |   # Set up some initial values: Event codes
 75 |   #
 76 |   ######
 77 | 
 78 |   ## Factor variables describing CAMEO categories
 79 |   rootcodes <- factor(1:20)
 80 |   levels(rootcodes) <- as.character(1:20)
 81 | 
 82 |   eventcodes <- factor(1:298)
 83 |   levels(eventcodes) <- as.character(
 84 |     c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31
 85 |       , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57
 86 |       , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86
 87 |       , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034
 88 |       , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116
 89 |       , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246
 90 |       , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385
 91 |       , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444
 92 |       , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663
 93 |       , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183
 94 |       , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042)
 95 |   )
 96 | 
 97 |   pentaclasses <- factor(0:4)
 98 |   levels(pentaclasses) <- as.character(0:4)
 99 | 
100 |   ######
101 |   #
102 |   # Set up some empty storage objects
103 |   #
104 |   ######
105 | 
106 |   # Storage for comparison of Phoenix and ICEWS reporting overlap
107 |   filler <- rep(NA, length(dates))
108 |   sources_overlap <- data.table(date = dates
109 |                                 , phoenix_only = filler
110 |                                 , icews_only = filler
111 |                                 , both_sources = filler)
112 | 
113 |   ######
114 |   #
115 |   # Download raw files from Phoenix data repo and ICEWS dataverse.
116 |   #
117 |   ######
118 | 
119 |   ## Download new Phoenix data tables. This will download the entire
120 |   ##  archive the first time this function is run and fully populate
121 |   ##  the destination folder.
122 | 
123 |   if(update == T){
124 |     message('Checking Phoenix data...')
125 |     update_phoenix(destpath = phoenix_loc, phoenix_version = 'current')
126 |   }
127 | 
128 | 
129 |   ## Check to see if ICEWS folder exists and that it has at least one 'valid'
130 |   ##  ICEWS data table stored.
131 |   message('Checking ICEWS data...')
132 |   icews_checkfile <- 'events.2000.20150313082808.tab'
133 |   icews_files <- list.files(icews_loc)
134 |   if(!icews_checkfile %in% icews_files){
135 |     stop('Please enter a valid path that contains the ICEWS yearly files.')
136 |   } else {
137 |     message('ICEWS file location is valid.')
138 |   }
139 | 
140 |   ######
141 |   #
142 |   # Read and parse ICEWS data for merging.
143 |   #
144 |   ######
145 | 
146 |   ## Read and parse ICEWS data
147 |   message('Ingesting ICEWS data...')
148 |   icews_data <- ingest_icews(icews_loc, start_date, end_date)
149 | 
150 |   ## Clean ICEWS data and format to Phoenix-style CAMEO codes
151 |   ##  for actors and states
152 |   message('Munging ICEWS data...')
153 |   icews_data <- icews_cameo(icews_data)
154 | 
155 |   ## Subset ICEWS data to only keep key columns
156 |   icews_data <- icews_data[, list(date, sourceactorentity
157 |                                   , targetactorentity, rootcode
158 |                                   , eventcode, goldstein)]
159 |   icews_data[, source := 'icews']
160 | 
161 |   ## Modify more complex ICEWS actor codes
162 |   icews_data[nchar(sourceactorentity) == 9, sourceactorentity :=
163 |                paste0(substr(icews_data[nchar(sourceactorentity) == 9, sourceactorentity], 1, 3)
164 |                       , substr(icews_data[nchar(sourceactorentity) == 9, sourceactorentity], 7, 9))
165 |              ]
166 | 
167 |   icews_data[nchar(targetactorentity) == 9, targetactorentity :=
168 |                paste0(substr(icews_data[nchar(targetactorentity) == 9, targetactorentity], 1, 3)
169 |                       , substr(icews_data[nchar(targetactorentity) == 9, targetactorentity], 7, 9))
170 |              ]
171 | 
172 |   ######
173 |   #
174 |   # Read and parse Phoenix data for merging.
175 |   #
176 |   ######
177 | 
178 |   ## Read and parse Phoenix data
179 |   message('Ingesting Phoenix data...')
180 |   phoenix_data <- ingest_phoenix(phoenix_loc = phoenix_loc
181 |                                         , start_date = start_date
182 |                                         , end_date = end_date)
183 | 
184 |   ## Subset Phoenix data to only keep key columns
185 |   phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity
186 |                                                    , sourceactorrole, sep = '')
187 |                                       , paste3(targetactorentity
188 |                                                , targetactorrole, sep = '')
189 |                                       , rootcode, eventcode, goldstein)]
190 |   setnames(phoenix_data, c('V2', 'V3')
191 |            , c('sourceactorentity', 'targetactorentity'))
192 |   phoenix_data[, source := 'phoenix']
193 | 
194 |   ######
195 |   #
196 |   # Combine ICEWS and Phoenix data
197 |   #
198 |   ######
199 | 
200 |   try({
201 |     master_data <- rbind(icews_data, phoenix_data)
202 |   }, silent = T)
203 |   if(class(master_data)[1] == 'try-error'){
204 |     message('Specified range does not include Phoenix data.')
205 |     master_data <- icews_data
206 |   }
207 |   setnames(master_data, c('sourceactorentity', 'targetactorentity')
208 |            , c('actora', 'actorb'))
209 | 
210 |   ## Drop any missing data
211 |   master_data <- master_data[complete.cases(master_data), ]
212 | 
213 |   ## Create new variable: Pentaclass (0-4)
214 |   master_data[rootcode %in% c(1, 2), pentaclass := 0L]
215 |   master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L]
216 |   master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L]
217 |   master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L]
218 |   master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L]
219 | 
220 |   ######################################
221 |   ## IMPORTANT ASSUMPTION HERE:
222 |   ## I am *ASSUMING* that NULL/NA entries after a state code
223 |   ##  implies that the actor is the GOVERNMENT. As such I am replacing
224 |   ##  all such missing entries with 'GOV'.
225 |   ######################################
226 |   master_data[actora %in%  countrycode::countrycode_data$iso3c
227 |               , actora := paste0(actora, 'GOV')]
228 |   master_data[actorb %in%  countrycode::countrycode_data$iso3c
229 |               , actorb := paste0(actorb, 'GOV')]
230 | 
231 |   ######
232 |   #
233 |   # Pre-format data by de-duplicating, cleaning dates and actors,
234 |   # and dropping unused columns
235 |   #
236 |   ######
237 | 
238 |   ## De-duplicate
239 |   master_data <- unique(master_data)
240 | 
241 |   ## Subset events and columns: only events that:
242 |   ##  1. involve specified actor set on both side (as ENTITIES)
243 |   ##  2. involve TWO DIFFERENT actors (i.e. no self-interactions
244 |   ##      as specified by user)
245 |   master_data <- master_data[(actora %in% actors
246 |                               & actorb %in% actors)]
247 |   master_data <- master_data[actora != actorb]
248 |   master_data[, actora := factor(actora, levels = levels(actors))]
249 |   master_data[, actorb := factor(actorb, levels = levels(actors))]
250 | 
251 |   ## Set CAMEO coded event/root codes to factors
252 |   master_data[, rootcode := factor(rootcode, levels = rootcodes)]
253 |   master_data$eventcode <- gsub('!', '', master_data$eventcode)
254 |   master_data[, eventcode := factor(as.integer(eventcode), levels = eventcodes)]
255 |   master_data[, pentaclass := factor(pentaclass, levels = pentaclasses)]
256 | 
257 |   ## Set keys
258 |   setkeyv(master_data, c('date', 'actora', 'actorb', 'eventcode', 'source'))
259 | 
260 | 
261 |   ######
262 |   #
263 |   # Export : how much overlap between Phoenix and ICEWS reporting?
264 |   #
265 |   ######
266 | 
267 |   ## Create some temporary flag variables
268 |   master_data[, dup_fromtop := duplicated(
269 |     master_data[, list(date, actora, actorb, rootcode, eventcode)])]
270 |   master_data[, dup_frombot := duplicated(
271 |     master_data[, list(date, actora, actorb, rootcode, eventcode)], fromLast = T)]
272 | 
273 |   ## Export data on reporting overlap
274 |   # Phoenix reporting only
275 |   dates_tab <- data.table(date = dates)
276 |   phoenix_only <- master_data[, sum(dup_fromtop == F
277 |                                     & source == 'phoenix'), by = date]
278 |   phoenix_only <- merge(dates_tab, phoenix_only, by = 'date', all.x = T)
279 |   phoenix_only[is.na(V1), V1 := 0]
280 |   sources_overlap$phoenix_only <- phoenix_only$V1
281 | 
282 |   # ICEWS reporting only
283 |   icews_only <- master_data[, sum(dup_frombot == F
284 |                                   & source == 'icews'), by = date]
285 |   icews_only <- merge(dates_tab, icews_only, by = 'date', all.x = T)
286 |   icews_only[is.na(V1), V1 := 0]
287 |   sources_overlap$icews_only <- icews_only$V1
288 | 
289 |   # Both sources report
290 |   both_sources <- master_data[, sum(dup_fromtop == T), by = date]
291 |   both_sources <- merge(dates_tab, both_sources, by = 'date', all.x = T)
292 |   both_sources[is.na(V1), V1 := 0]
293 |   sources_overlap$both_sources <- both_sources$V1
294 | 
295 |   ## Drop flags and source variable
296 |   master_data[, dup_fromtop := NULL]
297 |   master_data[, dup_frombot := NULL]
298 |   master_data[, source := NULL]
299 | 
300 |   ## Drop duplicated variables
301 |   master_data <- unique(master_data)
302 | 
303 |   ## Subset events: keep only events within date range
304 |   master_data <- master_data[date %in% dates]
305 | 
306 |   ## Create list of all actors in data set for output
307 |   main_actors <- actors[!actors %in% statelist]
308 | 
309 |   ## BIG DUMMY SECTION: dummy out all categorical event/root/pentaclass codes
310 |   master_data <- data.table(dummy.data.frame(master_data, names = c('pentaclass', 'rootcode', 'eventcode')))
311 | 
312 |   return(list(diagnostics = sources_overlap, netdata = master_data, actorlist = main_actors))
313 | }
314 | 


--------------------------------------------------------------------------------
/R/states_doc.R:
--------------------------------------------------------------------------------
 1 | #'
 2 | #' ICEWS CAMEO actor codes
 3 | #'
 4 | #' Merge table to convert states to CAMEO format using conversion tables created
 5 | #'    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
 6 | #'
 7 | #' @docType data
 8 | #'
 9 | #' @usage data(states)
10 | #'
11 | #' @keywords datasets
12 | #'
13 | #'
14 | "states"
15 | 


--------------------------------------------------------------------------------
/R/update_icews.R:
--------------------------------------------------------------------------------
 1 | #' Update a local directory of ICEWS dataset files with new files from the server
 2 | #'
 3 | #' Checks the contents of a directory containing ICEWS event data files, checks whether the
 4 | #' server has new events, and downloads them to that directory. (It'll have some version handling ability,
 5 | #' too, either from the file names or by reading in the events.)
 6 | #'
 7 | #' @param destpath The path to download ICEWS into.
 8 | #'
 9 | #' @return NULL
10 | #' @author Original concept and code for Phoenix: Andy Halterman
11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly.
12 | #' @examples
13 | #'
14 | 
15 | #' @import Rcurl
16 | #' @import dataverse
17 | #' @export
18 | #'
19 | update_icews <- function(destpath){
20 |   # pulls all the links from the ICEWS dataverse
21 |   link_data <- EventNetworks::get_icewslinks()
22 |   link_filelist <- link_data[, label]
23 |   link_filelist <- sapply(link_filelist, 'substr', 1, 30)
24 | 
25 |   ## Identify whether local ICEWS data exists
26 |   icews_files <- list.files(destpath)
27 | 
28 |   ## Determine what needs to be updated/downloaded
29 |   icews_links_years <- substr(link_filelist, 1, 11)
30 |   icews_files_years <- substr(icews_files, 1, 11)
31 |   icews_delete <- icews_files[!icews_files %in% link_filelist]
32 |   icews_download <- link_filelist[!link_filelist %in% icews_files]
33 | 
34 |   if(length(icews_download) == 0){
35 |     message('ICEWS data is current through the most recent month.')
36 |   }
37 |   else{
38 |     message('Updating ICEWS with most recent data release...')
39 | 
40 |     ## Delete out-of-date ICEWS files
41 |     if(length(icews_delete) > 0){
42 |       file.remove(paste0(destpath, '/', icews_delete))
43 |     }
44 |     ids <- link_data[label %in% names(icews_download), id]
45 | 
46 |     message("Downloading and unzipping files.")
47 |     plyr::l_ply(
48 |       ids
49 |       , EventNetworks:::dw_icewsfile
50 |       , destpath = destpath
51 |       , metadata = link_data
52 |       , .progress = plyr::progress_text(char = '=')
53 |       )
54 |   }
55 | 
56 | }
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/R/update_phoenix.R:
--------------------------------------------------------------------------------
 1 | #' Update a local directory of Phoenix dataset files with new files from the server
 2 | #'
 3 | #' Checks the contents of a directory containing Phoenix event data files, checks whether the
 4 | #' server has new events, and downloads them to that directory. (It'll have some version handling ability,
 5 | #' too, either from the file names or by reading in the events.)
 6 | #'
 7 | #' @param destpath The path to download Phoenix into.
 8 | #'
 9 | #' @return NULL
10 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly.
11 | #' @examples
12 | #'
13 | #' @import Rcurl
14 | #' @export
15 | #'
16 | update_phoenix <- function(destpath){
17 |   # pulls all the links from the OEDA Phoenix page
18 |   links <- EventNetworks::get_phoenixlinks()
19 |   links_shortened <- as.data.frame(stringr::str_match(links, "events.full.(\\d+).txt"), stringsAsFactors=FALSE)
20 |   filelist <- list.files(destpath)
21 |   filelist_shortened <- as.data.frame(stringr::str_match(filelist, "events.full.(\\d+).txt"), stringsAsFactors=FALSE)
22 |   # All rows in links_shortened that do not have a match in filelist_shortened.
23 |   new_files <- dplyr::anti_join(links_shortened, filelist_shortened, by = "V2")
24 |   if(nrow(new_files) == 0){
25 |     message('Phoenix data is current through today.')
26 |   }
27 |   else{
28 |     message("There are ", nrow(new_files), " undownloaded daily files. Downloading now...")
29 |     ll <- paste0("https://s3.amazonaws.com/oeda/data/current/", new_files$V1, ".zip")
30 |     message("Downloading and unzipping files.")
31 |     plyr::l_ply(ll, EventNetworks:::dw_phoenixfile, destpath = destpath, .progress = plyr::progress_text(char = '='))
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | `EventNetworks`
 2 | =====
 3 | 
 4 | Download, process, and transform historic and live-updated event data from the Phoenix and ICEWS repositories to temporal event-network structures for analysis.
 5 | 
 6 | Package description
 7 | ------------
 8 | `EventNetworks` includes a set of functions designed to gather event data, format and process these data, convert them into daily networks of interactions between states in the international system. 
 9 | 
10 | Data sources
11 | ------------
12 | The package can intake any or all of three different data sources:
13 | 
14 | 1. For events occurring between 1995 and one year behind the present day, it can use the public release of the [ICEWS data](https://dataverse.harvard.edu/dataverse/harvard?q=icews) for events occurring between 
15 | 1995 and one year behind the present day. 
16 | 2. For events occurring from June 2014 through the present, it can use
17 | the [daily live-updated Phoenix data](http://phoenixdata.org/data/current) created and
18 | released through the [Open Event Data Project](http://openeventdata.org). 
19 | 3. For events occurring from January 1945 to December 2015, it can use the [historic Phoenix data](http://www.clinecenter.illinois.edu/data/speed/phoenix/)created and maintained by the Cline Center for Democracy at the University of Illinois Urbana-Champaign. The historic Phoenix is based on three data sets (the New York Times, the BBC Summary of World Broadcasts, and the CIA's Foreign Broadcast Information Service) with differing timespan, which means that the base content will vary depending on the time specified.
20 | 
21 | Note that this means there are some time periods where events will be constructed
22 | using more than one data source. For example, events in 2015 are drawn from ICEWS, the live-updated Phoenix, and the historic Phoenix BBC SWB records. In cases where sources overlap, event records are de-duplicated based on the event-dyad-day tuple. I advise careful comparison of time periods with partial multiple-source overlap, as even with de-duplication this can introduce some significant changes in the number and coverage of reported events.
23 | 
24 | The main function 'eventNetworks' intakes raw data files based on the provided folder locations for live Phoenix, historic Phoenix, and ICEWS data sets. If files are not found for live Phoenix and ICEWS (both of which are regularly updated), it will automatically attempt to download the full data sets. If the "Update" argument is set as TRUE, it will also compare the existing files to the online repositories for the dates requested.
25 | 
26 | More information on data sets
27 | ------------
28 | The current development of the Phoenix Data Project is a collaborative effort between Caerus Associates (Erin Simpson, Andrew Halterman, and John Beieler), Parus Analytics (Phil Schrodt), The University of Texas at Dallas (Patrick Brandt), The Cline Center for Democracy at the University of Illinois at Urbana-Champaign, and The University of Oklahoma. Visit the Phoenix website here: [http://phoenixdata.org/](http://phoenixdata.org/) and the website of the Open Event Data Alliance here: [http://openeventdata.org/](http://openeventdata.org).
29 | 
30 | The historic Phoenix data files are maintained by the Cline Center for Democracy at the University of Illinois Urbana-Champaign. Visit the Cline Center's website here: [http://www.clinecenter.illinois.edu/data/speed/phoenix/](http://www.clinecenter.illinois.edu/data/speed/phoenix/).
31 | 
32 | The Integrated Crisis Early Warning System (ICEWS) public-release data is an ongoing data-coding and analysis initiative hosted at Lockheed Martin, originally funded through DARPA, and more recently  funded through the Office of Naval Research. ICEWS data is being released monthly through the Harvard Dataverse, with a 12-month (give or take a few months) lag: as of 12/10/2017, ICEWS data is available through the end of November 2016. For more information, visit [The W-ICEWS site](http://www.lockheedmartin.com/us/products/W-ICEWS/W-ICEWS_Team/Publications.html) at Lockheed Martin.
33 | 
34 | Current status and updates
35 | ------------
36 | `EventNetworks` is still in a very early stage of development, and is likely to change significantly over time.
37 | 
38 | __Recent changes:__
39 | 1. I have integrated functionality for processing and converting the historic Phoenix data released by the Cline Center for Democracy at UIUC. These data can be used on their own or in conjunction with the live Phoenix and ICEWS data. This means it's now possible to generate event-networks for the international system going all the way back to 1945 (although at that point you're relying solely on the NYT digitized record, with all the accompanying geographic and substantive bias one would anticipate).
40 | 2. I have finally gotten around to fixing the major bottleneck in ICEWS data processing from complex actor descriptors to the standardized CAMEO actor set. This change significantly speeds up processing time.
41 | 3. EventNetworks now outputs data as a __list of array objects__. Each list entry corresponds to one time unit, and each array has dimensions (_k_ x _k_ x _m_) where _k_ = number of actors and _m_ = number of network layers, or number of unique tie types returned.
42 | 
43 | 
44 | Long-term to-Do List
45 | ------------
46 | - [x] Add support for specifying a subset of actors to examine, or a 'container' (e.g., a state) within which to examine all actors.
47 | - [x] Add support for specifying the level of temporal aggregation for event-networks (e.g., day/week/month/year).
48 | - [x] Add support for specifying a particular class of interactions to extract and examine (e.g., rootcodes 2,4,6).
49 | - [x] Increase efficiency of network-stats extraction module by enabling parallelization of plyr functions using doMC backend.
50 | - [x] Add support for using the Cline Center's recent historic Phoenix data releases (http://www.clinecenter.illinois.edu/data/speed/phoenix/)
51 | - [x] Speed up the internal functions, particularly the ICEWS-to-CAMEO-code conversion. This is a major bottleneck.
52 | - [ ] Circle back around to update and improve the network-stats functionality to extract and present more useful information from the generated networks.
53 | - [ ] Clean up the documentation and clarify arguments to make it easier for others to use.
54 | - [ ] Set up more informative error messages :)
55 | 
56 | 
57 | Installation
58 | ------------
59 | `devtools::install_github("jrhammond/EventNetworks")`
60 | ```
61 | > pacman::p_load(EventNetworks)
62 | > 
63 | > sample_data <- EventNetworks::eventNetworks(
64 | +   start_date = 20140101
65 | +   , end_date = 20150101
66 | +   , level = 'pentaclass'
67 | +   , dv_key = [personal Harvard dataverse redacted]
68 | +   , phoenix_loc = '/Users/localadmin/Box Sync/DataSets/phoenix'
69 | +   , icews_loc = '/Users/localadmin/Box Sync/DataSets/icews'
70 | +   , histphoenix_loc = '/Users/localadmin/Box Sync/DataSets/CCHPED_v2017_06_30'
71 | +   , dv_server = 'harvard.dataverse.edu'
72 | +   , update = F
73 | +   , actorset = 'states'
74 | +   , codeset = 'all'
75 | +   , time_window = 'month'
76 | +   , code_subset = 'all'
77 | +   , tie_type = 'count'
78 | +   , sources = 'all'
79 | +   )
80 | Checking ICEWS data...
81 | Ingesting ICEWS data...
82 | Reading in files...
83 |   |===============================================================| 100%
84 | Process complete
85 | Munging ICEWS data...
86 | Ingesting Phoenix data...
87 | Note: specified range precedes the earliest Phoenix data.
88 | Reading in files...
89 |   |===============================================================| 100%
90 | Process complete
91 | Ingesting historic Phoenix data...
92 | Read 817955 rows and 25 (of 25) columns from 0.096 GB file in 00:00:03
93 | Read 1092211 rows and 25 (of 25) columns from 0.133 GB file in 00:00:03
94 | Read 2906715 rows and 25 (of 25) columns from 0.373 GB file in 00:00:08
95 |  ```
96 | 


--------------------------------------------------------------------------------
/data/agentnames.txt~:
--------------------------------------------------------------------------------
  1 | Government	GOV	GOV
  2 | Executive	GOV	GOV1
  3 | Executive Office	GOV	GOV2
  4 | Cabinet	GOV	GOV3
  5 | Agriculture / Fishing / Forestry Ministry	GOVAGR	GOVAGR
  6 | Finance / Economy / Commerce / Trade Ministry	GOVBUS	GOVBUS
  7 | Defense / Security Ministry	GOVMIL	GOVMIL
  8 | Education Ministry	GOVEDU	GOVEDU
  9 | Energy Ministry	GOVENV	GOV4
 10 | Environment Ministry	GOVENV	GOVENV
 11 | Transportation Ministry	GOVENV	GOVENV5
 12 | Food Ministry	GOVENV	GOVENV6
 13 | Disaster Ministry	GOVENV	GOVENV7
 14 | Foreign Ministry	GOV	GOVENV8
 15 | Health Ministry	GOVHLH	GOVHLH
 16 | Interior / Home Ministry	GOV	GOV9
 17 | Industrial / Textiles / Mining Ministry	GOVBUS	GOVHLH10
 18 | Science / Tech / Knowledge / Innovation Ministry	GOVHLH	GOVHLH11
 19 | NGO Ministry	GOV	GOVHLH12
 20 | Labor Ministry	GOVLAB	GOVLAB
 21 | Post / Tecoms Ministry	GOVBUS	GOVLAB13
 22 | Science / Tech Ministry	GOVBUS	GOVLAB14
 23 | Water Ministry	GOVLAB	GOVLAB15
 24 | Women / Children / Social / Welfare / Development / Religion Ministry	GOVDEV	GOVDEV
 25 | Justice / Law Ministry	GOVJUD	GOVJUD
 26 | Tourism Ministry	GOVJUD	GOVJUD16
 27 | Drugs Ministry	GOVJUD	GOVJUD17
 28 | Human Rights Ministry	GOVHRI	GOVHRI
 29 | Elections Ministry	GOVHRI	GOVHRI18
 30 | Housing / Construction Ministry	GOVHRI	GOVHRI19
 31 | Intelligence Ministry	SPY	SPY
 32 | Information / Communication / Transparency Ministry	GOVMED	GOVMED
 33 | State Media	GOVMED	GOVMED1
 34 | Management / Budget / Planning / Organization Ministry	GOV	GOV2
 35 | State Owned Enterprises	GOVBUS	GOVBUS20
 36 | State-Owned Agricultural	GOVBUS	GOVBUS21
 37 | State-Owned Transportation	GOVBUS	GOVBUS22
 38 | State-Owned Utilities	GOVBUS	GOVBUS23
 39 | State-Owned Heavy Industrial / Chemical	GOVBUS	GOVBUS24
 40 | State-Owned Defense / Security	GOVBUS	GOVBUS25
 41 | State-Owned Durable Goods	GOVBUS	GOVBUS26
 42 | State-Owned Consumer Goods	GOVBUS	GOVBUS27
 43 | State-Owned Consumer Services	GOVBUS	GOVBUS28
 44 | State-Owned Consulting / Financial Services	GOVBUS	GOVBUS29
 45 | State-Owned Science / Tech / Knowledge / Innovation	GOVBUS	GOVBUS30
 46 | State-Owned Medical / Health / Pharmeceutical	GOVBUS	GOVBUS31
 47 | Police	COP	COP
 48 | National / Border Divisions	COP	COP32
 49 | Provincial Divisions	COP	COP33
 50 | Municipal Divisions	COP	COP34
 51 | Military	MIL	MIL
 52 | Military Intelligence	MILSPY	MILSPY
 53 | Military Intelligence Headquarters	MILSPY	MILSPY35
 54 | Military Intelligence Special Forces	MILSPY	MILSPY36
 55 | Military Intelligence Infantry / Regular	MILSPY	MILSPY37
 56 | Military Intelligence Mechanized (Ships, Tanks, Planes)	MILSPY	MILSPY38
 57 | Military Intelligence Education / Training	MILSPY	MILSPY39
 58 | Military Intelligence Support	MILSPY	MILSPY40
 59 | Military Intelligence Medical	MILSPY	MILSPY41
 60 | Research And Design Wings	MIL	MIL42
 61 | Research And Design Wings Headquarters	MIL	MIL43
 62 | Research And Design Wings Education / Training	MIL	MIL44
 63 | Research And Design Wings Support	MIL	MIL45
 64 | Research And Design Wings Medical	MIL	MIL46
 65 | Army	MIL	MIL47
 66 | Army Headquarters	MIL	MIL48
 67 | Army Special Forces	MIL	MIL49
 68 | Army Infantry / Regular	MIL	MIL50
 69 | Army Mechanized (Ships, Tanks, Planes)	MIL	MIL51
 70 | Army Education / Training	MIL	MIL52
 71 | Army Support	MIL	MIL53
 72 | Army Medical	MIL	MIL54
 73 | Navy	MIL	MIL55
 74 | Navy Headquarters	MIL	MIL56
 75 | Navy Special Forces	MIL	MIL57
 76 | Navy Infantry / Regular	MIL	MIL58
 77 | Navy Mechanized (Ships, Tanks, Planes)	MIL	MIL59
 78 | Navy Education / Training	MIL	MIL60
 79 | Navy Support	MIL	MIL61
 80 | Navy Medical	MIL	MIL62
 81 | Air Force	MIL	MIL63
 82 | Air Force Headquarters	MIL	MIL64
 83 | Air Force Special Forces	MIL	MIL65
 84 | Air Force Infantry / Regular	MIL	MIL66
 85 | Air Force Mechanized (Ships, Tanks, Planes)	MIL	MIL67
 86 | Air Force Education / Training	MIL	MIL68
 87 | Air Force Support	MIL	MIL69
 88 | Air Force Medical	MIL	MIL70
 89 | Marines	MIL	MIL71
 90 | Marines Headquarters	MIL	MIL72
 91 | Marines Special Forces	MIL	MIL73
 92 | Marines Infantry / Regular	MIL	MIL74
 93 | Marines Mechanized (Ships, Tanks, Planes)	MIL	MIL75
 94 | Marines Education / Training	MIL	MIL76
 95 | Marines Support	MIL	MIL77
 96 | Marines Medical	MIL	MIL78
 97 | Coast Guard	MIL	MIL79
 98 | Coast Guard Headquarters	MIL	MIL80
 99 | Coast Guard Special Forces	MIL	MIL81
100 | Coast Guard Infantry / Regular	MIL	MIL82
101 | Coast Guard Mechanized (Ships, Tanks, Planes)	MIL	MIL83
102 | Coast Guard Education / Training	MIL	MIL84
103 | Coast Guard Support	MIL	MIL85
104 | Coast Guard Medical	MIL	MIL86
105 | Legislative / Parliamentary	LEG	LEG
106 | Upper House	LEG	LEG87
107 | Lower House	LEG	LEG88
108 | Unicameral	LEG	LEG89
109 | Judicial	JUD	JUD
110 | National / Supreme Court	JUD	JUD90
111 | Provincial Court	JUD	JUD91
112 | Municipal / District Court	JUD	JUD92
113 | Civil Court	JUD	JUD93
114 | Religious Court	JUD	JUD94
115 | Military / Tribunal	JUD	JUD95
116 | Local	JUD	JUD96
117 | Provincial	JUD	JUD97
118 | Municipal	JUD	JUD98
119 | Government Religious	GOVREL	GOVREL
120 | Parties	PTY	PTY
121 | (National) Major Party	PTY	PTY99
122 | Opposition Major Party (Out Of Government)	OPPPTY	OPPPTY
123 | Government Major Party (In Government)	GOVPTY	GOVPTY
124 | (National) Minor Party	PTY	PTY100
125 | Opposition Minor Party (Out Of Government)	OPPPTY	OPPPTY101
126 | Government Minor Party (In Government)	GOVPTY	GOVPTY102
127 | Provincial Party	PTY	PTY103
128 | Opposition Provincial Party (Out Of Government)	OPPPTY	OPPPTY104
129 | Government Provincial Party (In Government)	GOVPTY	GOVPTY105
130 | Municipal Party	PTY	PTY106
131 | Opposition Municipal Party (Out Of Government)	OPPPTY	OPPPTY107
132 | Government Municipal Party (In Government)	GOVPTY	GOVPTY108
133 | Dissident	OPP	OPP109
134 | Criminals / Gangs	CRM	CRM
135 | Protestors / Popular Opposition / Mobs	OPP	OPP
136 | Exiles	OPP	OPP110
137 | Banned Parties	OPPPTY	OPP111
138 | Radicals / Extremists / Fundamentalists	REB	RAD
139 | Organized Violent	REB	RAD112
140 | Rebel	REB	REB
141 | Insurgents	REB	INS
142 | Separatists	REB	SEP
143 | Social	CVL	CIV113
144 | Agricultural	BUS	BUSAGR
145 | Business	BUS	BUS
146 | Agricultural Business	BUS	BUS122
147 | Transportation Business	BUS	BUS123
148 | Utilities Business	BUS	BUS124
149 | Heavy Industrial / Chemical Business	BUS	BUS125
150 | Defense / Security Business	BUS	BUS126
151 | Durable Goods Business	BUS	BUS127
152 | Consumer Goods Business	BUS	BUS128
153 | Consumer Services Business	BUS	BUS129
154 | Consulting / Financial Services Business	BUS	BUS130
155 | Science / Tech / Knowledge / Innovation Business	BUS	BUS131
156 | Medical / Health / Pharmeceutical Business	BUS	BUS132
157 | Education	EDU	EDU
158 | Student	EDU	EDU115
159 | National Ethnic	CVL	CVL116
160 | National Ethnic Majority	CVL	CVL117
161 | National Ethnic Minority	CVL	CVL118
162 | General Population / Civilian / Social	CVL	CVL
163 | Labor	LAB	LAB
164 | Legal	JUD	JUD114
165 | Media	MED	MED
166 | News	MED	MED133
167 | Print News	MED	MED134
168 | Radio News	MED	MED135
169 | Television News	MED	MED136
170 | Online News	MED	MED137
171 | Entertainment	MED	MED138
172 | Print Entertainment	MED	MED139
173 | Radio Entertainment	MED	MED140
174 | Television Entertainment	MED	MED141
175 | Online Entertainment	MED	MED142
176 | Medical / Health	MED	MED99e
177 | Refugees / Displaced	REF	REF
178 | National Religious	REF	REF119
179 | Religious Majority	REF	REF120
180 | Religious Minority	REF	REF121
181 | Nongovernmental Organizations / Activists	IGO	IGO143
182 | Agricultural NGOs	IGOAGR	IGO148
183 | Business NGOs	IGOBUS	IGO144
184 | Education NGOs	IGOEDU	IGO146
185 | Energy NGOs	IGO	IGO147
186 | Environment NGOs	IGOENV	IGOENV
187 | Ethnic NGOs	IGOEDU	IGO
188 | Development NGOs	IGODEV	IGODEV
189 | Human Rights NGOs	IGOHRI	IGOHRI
190 | Information / Communication / Transparency NGOs	IGO	IGO150
191 | Labor NGOs	IGOLAB	IGO154
192 | Legal NGOs	IGOHRI	IGOHRI185
193 | Media NGOs	IGO	IGO438
194 | Medical / Health NGOs	IGOHLH	IGOHLH
195 | Refugees / Displaced NGOs	IGOREF	IGO145
196 | Charity NGOs	IGOHLH	IGOHLH149
197 | Elite	CVL	ELI
198 | Unidentified Forces	UAF	UAF
199 | Unaffiliated Sectors	iSec	iSec
200 | International Religious	REL	REL
201 | Atheist	ATH	ATH
202 | Animist	PAG	PAG
203 | Shamanist	SHA	SHA
204 | Muslim	MOS	MOS
205 | Alewi	MOSALE	MOSALE
206 | Sunni	MOSSUN	MOSSUN
207 | Shia	MOSSHI	MOSSHI
208 | Sufi	MOSSFI	MOSSFI
209 | Druze	MOSDRZ	MOSDRZ
210 | Hindu	HIN	HIN
211 | Christian	CHR	CHR
212 | Catholic	CHRCTH	CHRCTH
213 | Protestant	CHRPRO	CHRPRO
214 | Coptic	CHRCPT	CHRCPT
215 | Jehovah's Witness	CHRJHW	CHRJHW
216 | Maronite	CHRMRN	CHRMRN
217 | Orthodox	CHRDOX	CHRDOX
218 | Buddhist	BUD	BUD
219 | Jewish	JEW	JEW
220 | Daoist	TAO	REL152
221 | Shinto	REL	REL153
222 | Sikh	SIK	SIK
223 | Ahmadiyya	SIK	SIK154
224 | Jain	JAN	JAN
225 | Mormon	LDS	LDS
226 | Baha'I	BAH	BAH
227 | Zoroastrian / Mazdi	ZRO	ZRO
228 | Confucian	CON	CON
229 | International Ethnic	REL	REL155
230 | Chechen	CNY	CNY
231 | Kashmiri	KAS	KAS
232 | Acehnese	KAS	KAS156
233 | Han	KAS	KAS157
234 | Tamil	TAM	TAM
235 | Sinhalese	SNL	SNL
236 | Bodo	SNL	SNL158
237 | Shan	SNL	SNL159
238 | Mizo	SNL	SNL160
239 | Chakma	CKM	CKM
240 | Kuki	CKM	CKM161
241 | Tripuri	CKM	CKM162
242 | Moro	CKM	CKM163
243 | Uyghur	UIG	UIG
244 | Hmong	UIG	UIG164
245 | Karen	UIG	UIG165
246 | Assamese	UIG	UIG167
247 | Tibetan	TIB	TIB
248 | Albanian (Ethnic Group)	ABN	ABN
249 | Arab	ARB	ARB
250 | Arab, Arabian	ARB	ARB16691
251 | Arab, Hassaniya	ARB	ARB16692
252 | Arab, Levant	ARB	ARB16693
253 | Arab, Libyan	ARB	ARB16694
254 | Arab, Maghreb	ARB	ARB16695
255 | Arab, Shuwa	ARB	ARB16696
256 | Arab, Sudan	ARB	ARB16697
257 | Arab, Yemeni	ARB	ARB16698
258 | Bedouin	BED	BED
259 | Bedouin, Arabian	BED	BED16716
260 | Bedouin, Saharan	BED	BED16717
261 | Croat (Ethnic Group)	CRO	CRO
262 | Gypsy	GYP	GYP
263 | Hausa	HAU	HAU
264 | Hutu	HUT	HUT
265 | Ibo	IBO	IBO
266 | Ljaw	IJW	IJW
267 | Krahn	KRH	KRH
268 | Kurd (Ethnic Group)	KUR	KUR
269 | Mandingoe	MAN	MAN
270 | Ogoni	OGO	OGO
271 | Palestinian	PAL	PAL
272 | Serb (Ethnic Group)	SER	SER
273 | Slav	SLA	SLA
274 | Slav, Eastern	SLA	SLA16862
275 | Slav, Southern	SLA	SLA16863
276 | Slav, Western	SLA	SLA16864
277 | Tuareg	TRG	TRG
278 | Turk (Ethnic Group)	TRK	TRK
279 | Tutsi	TUT	TUT
280 | Yoruba	YRB	YRB
281 | Josua Project Ethnic Groups	ETN	ETN
282 | Aborigine	ETN	ETN16680
283 | Aceh of Sumatra	ETN	ETN16681
284 | Adamawa-Ubangi	ETN	ETN16682
285 | Adi	ETN	ETN16683
286 | Afar	ETN	ETN16684
287 | Aimaq	ETN	ETN16685
288 | Albanian	ETN	ETN16686
289 | Altaic	ETN	ETN16687
290 | Amazon	ETN	ETN16688
291 | Anglo-American	ETN	ETN16689
292 | Anglo-Celt	ETN	ETN16690
293 | Armenian	ETN	ETN16699
294 | Assyrian / Aramaic	ETN	ETN16700
295 | Atlantic	ETN	ETN16701
296 | Atlantic-Jola	ETN	ETN16702
297 | Atlantic-Wolof	ETN	ETN16703
298 | Aymara	ETN	ETN16704
299 | Azerbaijani	ETN	ETN16705
300 | Aztec	ETN	ETN16706
301 | Bali-Sasak	ETN	ETN16707
302 | Baloch	ETN	ETN16708
303 | Baltic	ETN	ETN16709
304 | Banjar of Kalimantan	ETN	ETN16710
305 | Bantu	ETN	ETN16711
306 | Bantu, Makua-Yao	ETN	ETN16712
307 | Bantu, Cameroon-Bamileke	ETN	ETN16908
308 | Bantu, Central-Congo	ETN	ETN16909
309 | Bantu, Central-East	ETN	ETN16910
310 | Bantu, Central-Lakes	ETN	ETN16911
311 | Bantu, Central-Luba	ETN	ETN16912
312 | Bantu, Central-South	ETN	ETN16913
313 | Bantu, Central-Southeast	ETN	ETN16914
314 | Bantu, Central-Southwest	ETN	ETN16915
315 | Bantu, Central-Tanzania	ETN	ETN16916
316 | Bantu, Chewa-Sena	ETN	ETN16917
317 | Bantu, East-Coastal	ETN	ETN16918
318 | Bantu, Gikuyu-Kamba	ETN	ETN16919
319 | Bantu, Kongo	ETN	ETN16920
320 | Bantu, Nguni	ETN	ETN16921
321 | Bantu, Northwest	ETN	ETN16922
322 | Bantu, Shona	ETN	ETN16923
323 | Bantu, Sotho-Tswana	ETN	ETN16924
324 | Bantu, Southeastern	ETN	ETN16925
325 | Bantu, Swahili	ETN	ETN16926
326 | Makua-Yao	ETN	ETN16712
327 | Cameroon-Bamileke	ETN	ETN16908
328 | Central-Congo	ETN	ETN16909
329 | Central-East	ETN	ETN16910
330 | Central-Lakes	ETN	ETN16911
331 | Central-Luba	ETN	ETN16912
332 | Central-South	ETN	ETN16913
333 | Central-Southeast	ETN	ETN16914
334 | Central-Southwest	ETN	ETN16915
335 | Central-Tanzania	ETN	ETN16916
336 | Chewa-Sena	ETN	ETN16917
337 | East-Coastal	ETN	ETN16918
338 | Gikuyu-Kamba	ETN	ETN16919
339 | Kongo	ETN	ETN16920
340 | Nguni	ETN	ETN16921
341 | Northwest	ETN	ETN16922
342 | Shona	ETN	ETN16923
343 | Sotho-Tswana	ETN	ETN16924
344 | Southeastern	ETN	ETN16925
345 | Swahili	ETN	ETN16926
346 | Barito of Kalimantan	ETN	ETN16713
347 | Basque	ETN	ETN16714
348 | Batak-Nias of Sumatra	ETN	ETN16715
349 | Beja	ETN	ETN16718
350 | Bengali	ETN	ETN16719
351 | Benue	ETN	ETN16720
352 | Berber-Saharan	ETN	ETN16721
353 | Berber-Kabyle	ETN	ETN16722
354 | Berber-Riff	ETN	ETN16723
355 | Berber-Shawiya	ETN	ETN16724
356 | Berber-Shilha	ETN	ETN16725
357 | Bhil	ETN	ETN16726
358 | Bhojpur-Maithili	ETN	ETN16727
359 | Bhutanese	ETN	ETN16728
360 | Bihari	ETN	ETN16729
361 | Bouyei	ETN	ETN16730
362 | Brahui	ETN	ETN16731
363 | Bugi-Makassar of Sulawesi	ETN	ETN16732
364 | Bungku-Bajau	ETN	ETN16733
365 | Burmese	ETN	ETN16734
366 | Borneo-Kalimantan	ETN	ETN16735
367 | Afro-Caribbean	ETN	ETN16736
368 | Afro-Caribbean, Anglophone	ETN	ETN16737
369 | Afro-Caribbean, Dutch	ETN	ETN16906
370 | Afro-Caribbean, Francophone	ETN	ETN16907
371 | Anglophone	ETN	ETN16737
372 | Dutch	ETN	ETN16906
373 | Francophone	ETN	ETN16907
374 | Caucasus	ETN	ETN16738
375 | Chadic	ETN	ETN16739
376 | Kanuri-Saharan	ETN	ETN16740
377 | Cham	ETN	ETN16741
378 | Chinese	ETN	ETN16742
379 | Chinese-Hui	ETN	ETN16743
380 | Egyptian	ETN	ETN16744
381 | Ethiopian	ETN	ETN16745
382 | Fiji	ETN	ETN16746
383 | Filipino	ETN	ETN16747
384 | Filipino, Central	ETN	ETN16748
385 | Filipino, Muslim	ETN	ETN16749
386 | Filipino, Tribal	ETN	ETN16750
387 | Finno-Ugric	ETN	ETN16751
388 | Finno-Ugric, Saami	ETN	ETN16752
389 | Flores-Sumba-Alor	ETN	ETN16753
390 | French	ETN	ETN16754
391 | Fulani / Fulbe	ETN	ETN16755
392 | Garo-Tripuri	ETN	ETN16756
393 | Germanic	ETN	ETN16757
394 | Gond	ETN	ETN16758
395 | Gorontalo of Sulawesi	ETN	ETN16759
396 | Greek	ETN	ETN16760
397 | Guarani	ETN	ETN16761
398 | Guera-Naba of Chad	ETN	ETN16762
399 | Guinean	ETN	ETN16763
400 | Gujarati	ETN	ETN16764
401 | Gur	ETN	ETN16765
402 | Hani	ETN	ETN16766
403 | Hindi	ETN	ETN16767
404 | Hispanic	ETN	ETN16768
405 | Hungarian	ETN	ETN16769
406 | Igbo	ETN	ETN16770
407 | Ijaw	ETN	ETN16771
408 | Inuit	ETN	ETN16772
409 | Italian	ETN	ETN16773
410 | Japanese	ETN	ETN16774
411 | Jat	ETN	ETN16775
412 | Jawa	ETN	ETN16776
413 | Jews	ETN	ETN16777
414 | Kaili-Tomini of Sulawesi	ETN	ETN16778
415 | Kannada	ETN	ETN16779
416 | Kazakh	ETN	ETN16780
417 | Khoisan	ETN	ETN16781
418 | Kyrgyz	ETN	ETN16782
419 | Korean	ETN	ETN16783
420 | Kru	ETN	ETN16784
421 | Kuki-Chin-Naga	ETN	ETN16785
422 | Lampung of Sumatra	ETN	ETN16787
423 | Lao	ETN	ETN16788
424 | Li	ETN	ETN16789
425 | Lisu	ETN	ETN16790
426 | Madura of Java	ETN	ETN16791
427 | Malagasy	ETN	ETN16792
428 | Malay	ETN	ETN16793
429 | Malayali	ETN	ETN16794
430 | Maldivian	ETN	ETN16795
431 | Malinke	ETN	ETN16796
432 | Malinke-Bambara	ETN	ETN16797
433 | Malinke-Jula	ETN	ETN16798
434 | Maltese	ETN	ETN16799
435 | Maluku	ETN	ETN16800
436 | Maluku, Central	ETN	ETN16801
437 | Maluku, Northern	ETN	ETN16802
438 | Maluku, Southern	ETN	ETN16803
439 | Manchu	ETN	ETN16804
440 | Mande	ETN	ETN16805
441 | Marathi-Konkani	ETN	ETN16806
442 | Maya	ETN	ETN16807
443 | Melayu of Sumatra	ETN	ETN16808
444 | Miao / Hmong	ETN	ETN16809
445 | Micronesian	ETN	ETN16810
446 | Minahasa-Sangir of Sulawesi	ETN	ETN16811
447 | Minangkabau-Rejang of Sumatra	ETN	ETN16812
448 | Miri-Kachin	ETN	ETN16813
449 | Mixe	ETN	ETN16814
450 | Mixteco	ETN	ETN16815
451 | Mizo-Lushai	ETN	ETN16816
452 | Mongolian	ETN	ETN16817
453 | Mon-Khmer	ETN	ETN16818
454 | Munda-Santal	ETN	ETN16819
455 | Musi of Sumatra	ETN	ETN16820
456 | Nepali-Pahari	ETN	ETN16821
457 | New Caledonia	ETN	ETN16822
458 | New Guinea	ETN	ETN16823
459 | Nilotic	ETN	ETN16824
460 | North American Indigenous	ETN	ETN16825
461 | Nosu	ETN	ETN16826
462 | Nuba Mountains	ETN	ETN16827
463 | Nubian	ETN	ETN16828
464 | Nupe	ETN	ETN16829
465 | Nuristan	ETN	ETN16830
466 | Ogan of Sumatra	ETN	ETN16831
467 | Omotic	ETN	ETN16832
468 | Oraon	ETN	ETN16833
469 | Oriya	ETN	ETN16834
470 | Oromo	ETN	ETN16835
471 | Other Central American Indigenous	ETN	ETN16836
472 | Other Hispanic American	ETN	ETN16837
473 | Other Pacific Islanders	ETN	ETN16838
474 | Other Southeast Asian	ETN	ETN16839
475 | Other South Asian	ETN	ETN16840
476 | Other Sub-Saharan African	ETN	ETN16841
477 | Otomi	ETN	ETN16842
478 | Ouaddai-Fur	ETN	ETN16843
479 | Parsee	ETN	ETN16844
480 | Pasemah of Sumatra	ETN	ETN16845
481 | Pashtun	ETN	ETN16846
482 | Persian	ETN	ETN16847
483 | Polynesian	ETN	ETN16848
484 | Portuguese	ETN	ETN16849
485 | Portuguese, Brazilian	ETN	ETN16850
486 | Portuguese, European	ETN	ETN16851
487 | Punjabi	ETN	ETN16852
488 | Pygmy	ETN	ETN16853
489 | Quechua	ETN	ETN16854
490 | Rajasthan	ETN	ETN16855
491 | Romanian	ETN	ETN16856
492 | South Himalaya	ETN	ETN16857
493 | Sara-Bagirmi	ETN	ETN16858
494 | Scandinavian	ETN	ETN16859
495 | Sindhi	ETN	ETN16860
496 | Sinhala	ETN	ETN16861
497 | Solomons	ETN	ETN16865
498 | Somali	ETN	ETN16866
499 | Songhai	ETN	ETN16867
500 | Soninke	ETN	ETN16868
501 | South American Indigenous	ETN	ETN16869
502 | Spanish	ETN	ETN16870
503 | Sudanic	ETN	ETN16871
504 | Sunda-Betawi of Java	ETN	ETN16872
505 | Susu	ETN	ETN16873
506 | Tai	ETN	ETN16874
507 | Tai Dam	ETN	ETN16875
508 | Tai-Kadai	ETN	ETN16876
509 | Taiwan Indigenous	ETN	ETN16877
510 | Tajik	ETN	ETN16878
511 | Talysh	ETN	ETN16879
512 | Telugu	ETN	ETN16880
513 | Thai	ETN	ETN16881
514 | Timor	ETN	ETN16882
515 | Toraja of Sulawesi	ETN	ETN16883
516 | Tukangbesi of Sulawesi	ETN	ETN16884
517 | Turkish	ETN	ETN16885
518 | Turkmen	ETN	ETN16886
519 | Ural-Siberian	ETN	ETN16887
520 | Urdu Muslim	ETN	ETN16888
521 | Uzbek	ETN	ETN16889
522 | Vanuatu	ETN	ETN16890
523 | Vietnamese	ETN	ETN16891
524 | West China / Lolo	ETN	ETN16892
525 | West Malaysia Indigenous	ETN	ETN16893
526 | Yao-Mien	ETN	ETN16894
527 | Zapoteco	ETN	ETN16895
528 | Zhuang	ETN	ETN16896
529 | Undefined	ETN	ETN16897
530 | Arab World	ETN	ETN16898
531 | Caucasian Peoples	ETN	ETN16899
532 | Sub-Saharan African	ETN	ETN16900
533 | Luri-Bakhtiari	ETN	ETN16901
534 | Deaf	ETN	ETN16902
535 | Afro-American	ETN	ETN16903
536 | Afro-American, Hispanic	ETN	ETN16904
537 | Afro-American, Northern	ETN	ETN16905
538 | Banda	ETN	ETN533
539 | Ideological	---	OPP
540 | Nationalist	---	OPP173
541 | Fundamentalist	--	OPP174
542 | Secular	---	OPP175
543 | Tribalist	---	OPP176
544 | Communist	---	OPP177
545 | Center Left	---	OPP178
546 | Center Right	---	OPP179
547 | Far Left	---	OPP180
548 | Far Right	---	OPP181
549 | Centrist	---	OPP182
550 | Libertarian	---	OPP183
551 | Anarchist	---	OPP184
552 | Nongovernmental Organization (International)	IGO	IGO
553 | Medical / Health IGOs	IGOHLH	IGOHLH
554 | Business IGOs	IGOBUS	IGOBUS
555 | Refugees IGOs	IGOREF	IGOREF
556 | Education IGOs	IGOEDU	IGOEDU
557 | Development IGOs	IGODEV	IGODEV
558 | Energy IGOs	IGOBUS	IGOBUS
559 | Agricultural IGOs	IGOAGR	IGOAGR
560 | Human Rights IGOs	IGOHRI	IGOHRI
561 | Charity IGOs	IGO	IGO
562 | Information / Communication / Transparency IGOs	IGO	IGO
563 | Environment IGOs	IGO	IGO196
564 | Legal IGOs	IGO	IGO9f7
565 | International Government Organization	IGO	IGO
566 | Global	IGO	IGO
567 | Global Information / Communication / Transparency IGOs	IGOMED	IGOMED
568 | Global Energy IGOs	IGOBUS	IGOBUS
569 | Global Diplomatic IGOs	IGO	IGO
570 | Global Defense / Security IGOs	IGOMIL	IGOMIL
571 | Global Law / Justice / Judicial IGOs	IGOJUD	IGOJUD
572 | Global Environment IGOs	IGOENV	IGOENV
573 | Global Economic/Financial/Trade IGOs	IGOBUS	IGOBUS
574 | Global Development IGOs	IGODEV	IGODEV
575 | Global Health IGOs	IGOHLH	IGOHLH
576 | Global Human Rights IGOs	IGOHRI	IGOHRI
577 | Global Agricultural IGOs	IGOAGR	IGOAGR
578 | Global Refugees IGOs	IGOREF	IGOREF
579 | Regional	IGO	IGO
580 | Regional Information / Communication / Transparency IGOs	IGOMED	IGOMED
581 | Regional Energy IGOs	IGOBUS	IGOBUS
582 | Regional Diplomatic IGOs	IGO	IGO
583 | Regional Defense / Security IGOs	IGOMIL	IGOMIL
584 | Regional Law / Justice / Judicial IGOs	IGOJUD	IGOJUD
585 | Regional Environment IGOs	IGOENV	IGOENV
586 | Regional Economic/Financial/Trade IGOs	IGOBUS	IGOBUS
587 | Regional Development IGOs	IGODEV	IGODEV
588 | Regional Health IGOs	IGOHLH	IGOHLH
589 | Regional Human Rights IGOs	IGOHRI	IGOHRI
590 | Regional Agricultural IGOs	IGOAGR	IGOAGR
591 | Regional Refugees IGOs	IGOREF	IGOREF
592 | International Dissident	INT	INT
593 | International Criminals / Gangs	INT	INT205
594 | International Protestors / Popular Opposition / Mobs	INT	INT206
595 | International Banned Parties	INT	INT207
596 | International Exiles	INT	INT208
597 | International Radicals / Extremists / Fundamentalists	IMG	IMG
598 | International Terrorists	IMG	IMG210
599 | International Rebels	IMG	IMG211
600 | International Insurgents	IMG	IMG212
601 | International Separatists	IMG	IMG213
602 | Multinational Corporation	MNC	MNC
603 | Agricultural MNCs	MNC	MNC215
604 | Transportation MNCs	MNC	MNC216
605 | Utilities MNCs	MNC	MNC217
606 | Heavy Industrial / Chemical MNCs	MNC	MNC218
607 | Defense / Security MNCs	MNC	MNC219
608 | Durable Goods MNCs	MNC	MNC220
609 | Consumer Goods MNCs	MNC	MNC221
610 | Consumer Services MNCs	MNC	MNC222
611 | Consulting / Financial Services MNCs	MNC	MNC223
612 | Science / Tech / Knowledge / Innovation MNCs	MNC	MNC224
613 | Medical / Health / Pharmeceutical MNCs	MNC	MNC225
614 | NULL	——— 


--------------------------------------------------------------------------------
/data/agents.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/agents.RData


--------------------------------------------------------------------------------
/data/convert_cameo_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/convert_cameo_data.RData


--------------------------------------------------------------------------------
/data/states.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/states.RData


--------------------------------------------------------------------------------
/man/agents.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/agents_doc.R
 3 | \docType{data}
 4 | \name{agents}
 5 | \alias{agents}
 6 | \title{ICEWS CAMEO actor codes}
 7 | \format{An object of class \code{data.table} (inherits from \code{data.frame}) with 614 rows and 3 columns.}
 8 | \usage{
 9 | data(agents)
10 | }
11 | \description{
12 | Merge table to convert actors to CAMEO format using conversion tables created
13 |    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/convert_cameo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_cameo.R
 3 | \name{convert_cameo}
 4 | \alias{convert_cameo}
 5 | \alias{countrycode}
 6 | \title{Convert CAMEO Codes}
 7 | \usage{
 8 | convert_cameo(cameo)
 9 | }
10 | \arguments{
11 | \item{cameo}{Vector of CAMEO event codes.}
12 | }
13 | \description{
14 | Converts CAMEO codes. CAMEO is an event ontology used in event data projects, including Phoenix.
15 | }
16 | \examples{
17 | events$Description <- phoxy::convert_cameo(events$EventCode) # Vector of values to be converted
18 | 
19 | }
20 | \keyword{data}
21 | \keyword{event}
22 | 


--------------------------------------------------------------------------------
/man/convert_cameo_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_cameo_data.R
 3 | \docType{data}
 4 | \name{convert_cameo_data}
 5 | \alias{convert_cameo_data}
 6 | \title{CAMEO code translation data frame}
 7 | \format{A data frame with 310 rows and 2 columns}
 8 | \usage{
 9 | convert_cameo_data
10 | }
11 | \description{
12 | A data frame with 310 rows and 2 columns.
13 | Used internally by the \code{convert_cameo()} function.
14 | }
15 | \details{
16 | \itemize{
17 |   \item CAMEOcode: the 310 different low-level CAMEO codes.
18 |   \item EventDescription: Human-readable descriptions of the codes.
19 | }
20 | }
21 | \note{
22 | The current CAMEO codebook is located here: \url{http://eventdata.parusanalytics.com/data.dir/cameo.html}.
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/download_icews.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/download_icews.R
 3 | \name{get_icewslinks}
 4 | \alias{get_icewslinks}
 5 | \title{Download the ICEWS Dataset}
 6 | \usage{
 7 | get_icewslinks(dv_server, dv_key)
 8 | }
 9 | \arguments{
10 | \item{destpath}{The path to the directory where ICEWS should go.}
11 | }
12 | \description{
13 | Download and unzip all of the data files for the ICEWS dataset from the
14 | Harvard Dataverse into a given directory.
15 | }
16 | \note{
17 | This function is still in development and may contain errors and change quickly.
18 | }
19 | \examples{
20 | 
21 | download_icews("~/ICEWS/")
22 | 
23 | }
24 | \author{
25 | Original code and concept: Tony Boyles
26 | }
27 | 


--------------------------------------------------------------------------------
/man/download_phoenix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/download_phoenix.R
 3 | \name{get_phoenixlinks}
 4 | \alias{get_phoenixlinks}
 5 | \title{Download the Phoenix Dataset}
 6 | \usage{
 7 | get_phoenixlinks(start_date = as.Date("2014-06-20"), end_date = Sys.Date())
 8 | }
 9 | \arguments{
10 | \item{destpath}{The path to the directory where Phoenix should go.}
11 | 
12 | \item{phoenix_version.}{Download a specific version of Phoenix ("v0.1.0" or the current version by default).}
13 | }
14 | \description{
15 | Download and unzip all of the data files for the Phoenix dataset from the
16 | Phoenix data website into a given directory.
17 | }
18 | \note{
19 | This function, like Phoenix, is still in development and may contain errors and change quickly.
20 | }
21 | \examples{
22 | 
23 | download_phoenix("~/OEDA/phoxy_test/", phoenix_version = "current")
24 | 
25 | }
26 | \author{
27 | Original code credit: Andy Halterman
28 | }
29 | 


--------------------------------------------------------------------------------
/man/eventNetworks.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/eventNetworks.R
  3 | \name{eventNetworks}
  4 | \alias{eventNetworks}
  5 | \title{Convert Phoenix event data to daily event-networks.}
  6 | \usage{
  7 | eventNetworks(start_date, end_date, level, dv_key, phoenix_loc = NULL,
  8 |   icews_loc = NULL, histphoenix_loc = NULL,
  9 |   dv_server = "harvard.dataverse.edu", update = TRUE, actorset = "states",
 10 |   codeset = "all", time_window = "day", code_subset = "all",
 11 |   tie_type = "binary", sources = "all")
 12 | }
 13 | \arguments{
 14 | \item{start_date}{start date of time period as Ymd-format integer (ex:
 15 | June 1, 2014 as 20140601).}
 16 | 
 17 | \item{end_date}{end date of time period as Ymd-format integer (ex:
 18 | June 1, 2014 as 20140601).}
 19 | 
 20 | \item{level}{level of event granularity ('eventcode', 'rootcode',
 21 | 'pentaclass', or 'goldstein'). 'Eventcode' creates a network for
 22 | each of the 226 sub-codes in CAMEO. 'Rootcode' creates a network
 23 | for each of the 20 event root codes in CAMEO. 'Pentaclass' creates
 24 | a network for each of the 0-4 pentaclass codes in CAMEO.
 25 | 'Goldstein' creates one or two networks denoting mean Goldstein
 26 | scores, either aggregated (positive - negative) or separated into
 27 | two separate networks for positive and negative Goldstein scores.}
 28 | 
 29 | \item{dv_key}{Unique user key to access SWORD API and automatically find and
 30 | download up-to-date ICEWS data.}
 31 | 
 32 | \item{phoenix_loc}{folder containing Phoenix data sets as daily .csv
 33 | data tables. Automatically checks for new data sets each time
 34 | the function is run, and downloads new daily data as it becomes
 35 | available. Currently in 'one-and'done' format
 36 | where it downloads the first time, and checks thereafter.}
 37 | 
 38 | \item{icews_loc}{folder containing ICEWS data sets as daily .tab data
 39 | tables. Because I don't know how to work a SWORD API, these will
 40 | need to be manually downloaded and updated.}
 41 | 
 42 | \item{histphoenix_loc}{folder containing historic Phoenix data from
 43 | UIUC's Cline Center for Democracy. Leave empty if you don't
 44 | want to use these data.}
 45 | 
 46 | \item{dv_server}{Dataverse server address from which to download
 47 | up-to-date ICEWS data. Defaults to Harvard Dataverse at
 48 | harvard.dataverse.edu.}
 49 | 
 50 | \item{update}{should phoenixNet attempt to download new data? This will attempt
 51 | to download any Phoenix data files that 'should' be present in the
 52 | Phoenix data directory (one data file per day, from 2014-06-20 through
 53 | the present day) and denote whether or not any of these files
 54 | come up missing in the process.}
 55 | 
 56 | \item{actorset}{set of actors for which to create event-networks. Defaults
 57 | to the 255 ISO-coded states in the international system. Specifying
 58 | a specific state or set of states (as 3-character ISO codes) will
 59 | extract all the 'major' domestic entites within that state/states.}
 60 | 
 61 | \item{codeset}{subset of event codes as specified by 'level'. This is useful
 62 | if you desire to extract only a portion of interactions recorded
 63 | by CAMEO, but has to align with the code aggregation specified
 64 | in the 'level' argument. For example, if you specify 'rootcode',
 65 | the 'codeset' you specify has to be one or more root codes between
 66 | 1 and 20. Entering a subset of root code values would return a
 67 | smaller number of network layers. Defaults to 'all'.}
 68 | 
 69 | \item{time_window}{temporal window to build event-networks. Valid
 70 | entries are 'day', 'week', 'month', or 'year'.}
 71 | 
 72 | \item{code_subset}{subset of EVENTCODES that can be aggregated up to higher
 73 | order interactions. For example, you might want to only look at
 74 | event codes below 100, but then aggregate those event codes to
 75 | rootcode or pentaclass.}
 76 | 
 77 | \item{tie_type}{type of ties to return. Default is binarized ties where
 78 | a tie represents the presence of one OR MORE interactions in the
 79 | time period specified. Valid entries are 'binary', 'count'
 80 | (count of events), 'meangoldstein' (mean Goldstein score),
 81 | 'sepgoldstein' (mean positive/negative Goldstein scores separated).
 82 | NOTE: choosing a Goldstein score as tie type negates the "level"
 83 | argument.}
 84 | 
 85 | \item{sources}{use only Phoenix or ICEWS data in creating event networks.
 86 | Valid entries are 'phoenix', 'icews', 'histphoenix' or 'all' (default).}
 87 | 
 88 | \item{dv_server}{location of the ICEWS Dataverse server. Defaults to
 89 | "harvard.dataverse.edu" and probably won't change anytime soon.}
 90 | }
 91 | \value{
 92 | master_networks a LIST object containing temporally referenced event-networks.
 93 | }
 94 | \description{
 95 | Take event-level data and convert it into
 96 |  networks of interaction by time period. Output is in
 97 |  the form of a nested list object where each element is
 98 |  an R network object. These networks can then be processed
 99 |  and analyzed.
100 | }
101 | \note{
102 | This function is still in early development and may contain significant errors.
103 |        Don't trust it.
104 | }
105 | \author{
106 | Jesse Hammond
107 | }
108 | 


--------------------------------------------------------------------------------
/man/extract_dyadstats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_dyadstats.R
 3 | \name{extract_dyadstats}
 4 | \alias{extract_dyadstats}
 5 | \title{Extract dyad-level statistics from a given event-network.}
 6 | \usage{
 7 | extract_dyadstats(input_date = this_date, event_dnet = tsna_obj)
 8 | }
 9 | \arguments{
10 | \item{input_date}{A date in integer %Y%m%d format.}
11 | 
12 | \item{event_dnet}{network object object containing a set of interactions.}
13 | }
14 | \value{
15 | net_stats Table of dyad-level statistics.
16 | }
17 | \description{
18 | INTERNAL FUNCTION: Intakes a given network object and returns a set
19 |  of dyad-level statistics for output.
20 | }
21 | \keyword{data}
22 | \keyword{event}
23 | \keyword{phoenix,}
24 | 


--------------------------------------------------------------------------------
/man/extract_netstats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_netstats.R
 3 | \name{extract_netstats}
 4 | \alias{extract_netstats}
 5 | \title{Extract network-level statistics from a given event-network.}
 6 | \usage{
 7 | extract_netstats(input_date = this_date, event_dnet = event_dnet,
 8 |   datelist = dates)
 9 | }
10 | \arguments{
11 | \item{input_date}{A date in integer %Y%m%d format.}
12 | 
13 | \item{event_dnet}{network object object containing a set of interactions.}
14 | }
15 | \value{
16 | net_stats Table of network-level statistics.
17 | }
18 | \description{
19 | INTERNAL FUNCTION: Intakes a given network object and returns a set
20 |  of network-level statistics for output.
21 | }
22 | \keyword{data}
23 | \keyword{event}
24 | \keyword{phoenix,}
25 | 


--------------------------------------------------------------------------------
/man/extract_nodestats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_nodestats.R
 3 | \name{extract_nodestats}
 4 | \alias{extract_nodestats}
 5 | \title{Extract node-level statistics from a given event-network.}
 6 | \usage{
 7 | extract_nodestats(input_date = this_date, event_dnet = tsna_obj)
 8 | }
 9 | \arguments{
10 | \item{input_date}{A date in integer %Y%m%d format.}
11 | 
12 | \item{event_dnet}{network object object containing a set of interactions.}
13 | }
14 | \value{
15 | net_stats Table of node-level statistics.
16 | }
17 | \description{
18 | INTERNAL FUNCTION: Intakes a given network object and returns a set
19 |  of node-level statistics for output.
20 | }
21 | \keyword{data}
22 | \keyword{event}
23 | \keyword{phoenix,}
24 | 


--------------------------------------------------------------------------------
/man/icews_cameo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/icews_cameo.R
 3 | \name{icews_cameo}
 4 | \alias{icews_cameo}
 5 | \title{Convert ICEWS state/actor codes into CAMEO format,
 6 |  and extract root codes from specific CAMEO event codes.}
 7 | \usage{
 8 | icews_cameo(icews)
 9 | }
10 | \description{
11 | Intake a set of ICEWS data (read in after some pre-processing)
12 |    and convert entries to CAMEO format using conversion tables created
13 |    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
14 | }
15 | \details{
16 | @param icews ICEWS data as one large data.table
17 | 
18 | @return icews ICEWS data with several new CAMEO code columns.
19 | 
20 | @keywords phoenix, event data
21 | 
22 | @import data.table
23 |  @import plyr
24 | 
25 | @export
26 | }
27 | 


--------------------------------------------------------------------------------
/man/ingest_histphoenix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ingest_histphoenix.R
 3 | \name{ingest_histphoenix}
 4 | \alias{ingest_histphoenix}
 5 | \title{Ingest the historic Phoenix Dataset}
 6 | \usage{
 7 | ingest_histphoenix(histphoenix_loc, start_date = start_date,
 8 |   end_date = end_date, statelist = statelist)
 9 | }
10 | \arguments{
11 | \item{histphoenix_loc}{The path to the Phoenix folder.}
12 | 
13 | \item{end_date}{}
14 | }
15 | \value{
16 | A single dataframe with all the historic Phoenix events in the folder.
17 | }
18 | \description{
19 | Given a directory with the historic Phoenix dataset files, quickly read
20 | them all in, name them correctly, and combine them into one dataframe.
21 | }
22 | \note{
23 | This function, like Phoenix, is still in development and may contain errors and change quickly.
24 | }
25 | \examples{
26 | 
27 | events <- ingest_histphoenix("~/histphoenix")
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/ingest_icews.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ingest_icews.R
 3 | \name{ingest_icews}
 4 | \alias{ingest_icews}
 5 | \title{Ingest the ICEWS Event Dataset}
 6 | \usage{
 7 | ingest_icews(dir, start_date, end_date)
 8 | }
 9 | \arguments{
10 | \item{dir}{The path to the ICEWS folder.}
11 | 
12 | \item{start_date}{Start of date range as YYYYMMDD integer format.}
13 | 
14 | \item{end_date}{End of date range as YYYYMMDD integer format.}
15 | }
16 | \value{
17 | A single dataframe with all the ICEWS events in the folder.
18 | }
19 | \description{
20 | Given a directory with individual ICEWS dataset files, quickly read
21 | them all in, name them correctly, and combine them into one dataframe.
22 | }
23 | \note{
24 | This function is still in development and may contain errors and change quickly.
25 | }
26 | \examples{
27 | 
28 | events <- ingest_icews("~/ICEWS/study_28075/Data/", 20101201, 20140101)
29 | 
30 | }
31 | \author{
32 | Andy Halterman, forked by Jesse Hammond
33 | }
34 | 


--------------------------------------------------------------------------------
/man/ingest_phoenix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ingest_phoenix.R
 3 | \name{ingest_phoenix}
 4 | \alias{ingest_phoenix}
 5 | \title{Ingest the Phoenix Dataset}
 6 | \usage{
 7 | ingest_phoenix(phoenix_loc, start_date, end_date)
 8 | }
 9 | \arguments{
10 | \item{phoenix_loc}{The path to the Phoenix folder.}
11 | 
12 | \item{start_date}{Start of date range as YYYYMMDD integer format.}
13 | 
14 | \item{end_date}{End of date range as YYYYMMDD integer format.}
15 | }
16 | \value{
17 | A single dataframe with all the Phoenix events in the folder.
18 | }
19 | \description{
20 | Given a directory with individual Phoenix dataset files, quickly read
21 | them all in, name them correctly, and combine them into one dataframe.
22 | }
23 | \note{
24 | This function, like Phoenix, is still in development and may contain errors and change quickly.
25 | }
26 | \examples{
27 | 
28 | events <- ingest_phoenix("~/OEDA/phoxy_test/", 20140620, 20150101)
29 | 
30 | }
31 | \author{
32 | Andy Halterman, forked by Jesse Hammond
33 | }
34 | 


--------------------------------------------------------------------------------
/man/phoenix_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/phoenix_stats.R
 3 | \name{phoenix_stats}
 4 | \alias{phoenix_stats}
 5 | \title{Extract statistics from daily Phoenix event-networks.}
 6 | \usage{
 7 | phoenix_stats(dailynets, time_window = "day", codes = "all",
 8 |   do_parallel = F, n_cores = 4)
 9 | }
10 | \arguments{
11 | \item{dailynets}{networkDynamic object containing daily event-nets
12 | produced via phoenix_net function.}
13 | 
14 | \item{time_window}{time interval of aggregate event-network objects. Valid
15 | entries are 'day', 'week', 'month', 'year'.}
16 | 
17 | \item{codes}{string of event codes, root codes, or pentaclass codes.
18 | Note: these codes have to be in the same format as the original
19 | network layers created via 'phoenix_net'. If you specify rootcodes
20 | in the creation step, entering pentaclass codes in this step
21 | will produce an error.}
22 | 
23 | \item{do_parallel}{Logical TRUE-FALSE. Whether to use parallel backend
24 | 'doMC' when extracting network statistics by code. Considerably
25 | faster than single-core, but less reliable.}
26 | }
27 | \value{
28 | phoenix_out a LIST object of tables containing descriptive
29 |          statistics for daily event-networks.
30 | }
31 | \description{
32 | Take a set of daily networks generated by the phoenix_net function,
33 |  and extract a variety of daily statistics at the network and nodal
34 |  levels.
35 | }
36 | \keyword{data}
37 | \keyword{event}
38 | \keyword{phoenix,}
39 | 


--------------------------------------------------------------------------------
/man/phoenix_tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/phoenix_tables.R
 3 | \name{phoenix_tables}
 4 | \alias{phoenix_tables}
 5 | \title{Scrape, merge, and process Phoenix and ICEWS data into
 6 | a large data table for aggregation and subsetting.}
 7 | \usage{
 8 | phoenix_tables(phoenix_loc, icews_loc, update = T)
 9 | }
10 | \arguments{
11 | \item{phoenix_loc}{folder containing Phoenix data sets as daily .csv
12 | data tables. Automatically checks for new data sets each time
13 | the function is run, and downloads new daily data as it becomes
14 | available. Currently in 'one-and'done' format
15 | where it downloads the first time, and checks thereafter.}
16 | 
17 | \item{icews_loc}{folder containing ICEWS data sets as daily .tab data
18 | tables. Because I don't know how to work a SWORD API, these will
19 | need to be manually downloaded and updated.}
20 | }
21 | \value{
22 | master_table a data.table object containing ALL merged/processed
23 |          Phoenix and ICEWS data. One row per event-dyad-day.
24 | }
25 | \description{
26 | Scrape, merge, and process Phoenix and ICEWS data into
27 | a large data table for aggregation and subsetting.
28 | }
29 | \note{
30 | This function is still in early development and may contain significant errors.
31 |        Don't trust it.
32 | }
33 | \author{
34 | Jesse Hammond
35 | }
36 | 


--------------------------------------------------------------------------------
/man/states.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/states_doc.R
 3 | \docType{data}
 4 | \name{states}
 5 | \alias{states}
 6 | \title{ICEWS CAMEO actor codes}
 7 | \format{An object of class \code{data.table} (inherits from \code{data.frame}) with 260 rows and 3 columns.}
 8 | \usage{
 9 | data(states)
10 | }
11 | \description{
12 | Merge table to convert states to CAMEO format using conversion tables created
13 |    by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO)
14 | }
15 | \keyword{datasets}
16 | 


--------------------------------------------------------------------------------
/man/update_icews.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/update_icews.R
 3 | \name{update_icews}
 4 | \alias{update_icews}
 5 | \title{Update a local directory of ICEWS dataset files with new files from the server}
 6 | \usage{
 7 | update_icews(destpath)
 8 | }
 9 | \arguments{
10 | \item{destpath}{The path to download ICEWS into.}
11 | }
12 | \description{
13 | Checks the contents of a directory containing ICEWS event data files, checks whether the
14 | server has new events, and downloads them to that directory. (It'll have some version handling ability,
15 | too, either from the file names or by reading in the events.)
16 | }
17 | \note{
18 | This function, like Phoenix, is still in development and may contain errors and change quickly.
19 | }
20 | \examples{
21 | 
22 | }
23 | \author{
24 | Original concept and code for Phoenix: Andy Halterman
25 | }
26 | 


--------------------------------------------------------------------------------
/man/update_phoenix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/update_phoenix.R
 3 | \name{update_phoenix}
 4 | \alias{update_phoenix}
 5 | \title{Update a local directory of Phoenix dataset files with new files from the server}
 6 | \usage{
 7 | update_phoenix(destpath)
 8 | }
 9 | \arguments{
10 | \item{destpath}{The path to download Phoenix into.}
11 | }
12 | \description{
13 | Checks the contents of a directory containing Phoenix event data files, checks whether the
14 | server has new events, and downloads them to that directory. (It'll have some version handling ability,
15 | too, either from the file names or by reading in the events.)
16 | }
17 | \note{
18 | This function, like Phoenix, is still in development and may contain errors and change quickly.
19 | }
20 | \examples{
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/testing_script.R:
--------------------------------------------------------------------------------
 1 | #devtools::install_github('jrhammond/EventNetworks')
 2 | pacman::p_load(EventNetworks)
 3 | ?eventNetworks
 4 | 
 5 | test <- EventNetworks::eventNetworks(
 6 |   start_date = 20140101
 7 |            , end_date = 20150101
 8 |            , level = 'pentaclass'
 9 |            , dv_key = '002231de-d465-401b-ac91-c2697b948694'
10 |            , phoenix_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\phoenix'
11 |            , icews_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\icews'
12 |            , histphoenix_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\CCHPED_v2017_06_30'
13 |            , dv_server = 'harvard.dataverse.edu'
14 |            , update = F
15 |            , actorset = 'states'
16 |            , codeset = 'all'
17 |            , time_window = 'month'
18 |            , code_subset = 'all'
19 |            , tie_type = 'meangoldstein'
20 |            , sources = 'all'
21 |   )
22 | 


--------------------------------------------------------------------------------