├── (Jesse Hammond's conflicted copy 2015-11-26).Rhistory ├── .DS_Store ├── .Rapp.history ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── EventNetworks.Rproj ├── NAMESPACE ├── R ├── .DS_Store ├── .Rapp.history ├── EventNetworks.R ├── agents_doc.R ├── convert_cameo.R ├── convert_cameo_data.R ├── download_icews.R ├── download_phoenix.R ├── extract_dyadstats.R ├── extract_netstats.R ├── extract_nodestats.R ├── icews_cameo.R ├── ingest_histphoenix.R ├── ingest_icews.R ├── ingest_phoenix.R ├── phoenix_stats.R ├── phoenix_tables.R ├── states_doc.R ├── update_icews.R └── update_phoenix.R ├── README.md ├── data ├── agentnames.txt~ ├── agents.RData ├── convert_cameo_data.RData └── states.RData ├── man ├── agents.Rd ├── convert_cameo.Rd ├── convert_cameo_data.Rd ├── download_icews.Rd ├── download_phoenix.Rd ├── eventNetworks.Rd ├── extract_dyadstats.Rd ├── extract_netstats.Rd ├── extract_nodestats.Rd ├── icews_cameo.Rd ├── ingest_histphoenix.Rd ├── ingest_icews.Rd ├── ingest_phoenix.Rd ├── phoenix_stats.Rd ├── phoenix_tables.Rd ├── states.Rd ├── update_icews.Rd └── update_phoenix.Rd └── testing_script.R / (Jesse Hammond's conflicted copy 2015-11-26).Rhistory: -------------------------------------------------------------------------------- 1 | # Download raw files from Phoenix data repo and ICEWS dataverse. 2 | # 3 | ###### 4 | ## Download new Phoenix data tables. This will download the entire 5 | ## archive the first time this function is run and fully populate 6 | ## the destination folder. 7 | ## NOTE: This currently requires a clumsy step where it reinstalls phoxy 8 | ## every time the code is run. This should be cleaned up, but I'm not 9 | ## 100% sure how to do so in a way that's both accurate and polite. 10 | message('Checking Phoenix data...') 11 | # library(phoxy) 12 | phoxy::update_phoenix(destpath = phoenix_loc, phoenix_version = 'current') 13 | ## Check to see if ICEWS folder exists and that it has at least one 'valid' 14 | ## ICEWS data table stored. 15 | message('Checking ICEWS data...') 16 | icews_checkfile <- 'events.2000.20150313082808.tab' 17 | icews_files <- list.files(icews_loc) 18 | if(!icews_checkfile %in% icews_files){ 19 | stop('Please enter a valid path that contains the ICEWS yearly files.') 20 | } else { 21 | message('ICEWS file location is valid.') 22 | } 23 | ###### 24 | # 25 | # Read and parse ICEWS data for merging. 26 | # 27 | ###### 28 | ## Read and parse ICEWS data 29 | message('Ingesting ICEWS data...') 30 | icews_data <- phoxy::ingest_icews(icews_loc, start_date, end_date) 31 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes 32 | ## for actors and states 33 | message('Munging ICEWS data...') 34 | icews_data <- icews_cameo(icews_data) 35 | ## Subset ICEWS data to only keep key columns 36 | icews_data <- icews_data[, list(date, sourceactorentity 37 | , targetactorentity, rootcode 38 | , eventcode, goldstein)] 39 | icews_data[, source := 'icews'] 40 | ###### 41 | # 42 | # Read and parse Phoenix data for merging. 43 | # 44 | ###### 45 | ## Read and parse Phoenix data 46 | message('Ingesting Phoenix data...') 47 | phoenix_data <- phoxy::ingest_phoenix(phoenix_loc = phoenix_loc 48 | , start_date = start_date 49 | , end_date = end_date) 50 | ## Subset Phoenix data to only keep key columns 51 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity 52 | , sourceactorrole, sep = '') 53 | , paste3(targetactorentity 54 | , targetactorrole, sep = '') 55 | , rootcode, eventcode, goldstein)] 56 | setnames(phoenix_data, c('V2', 'V3') 57 | , c('sourceactorentity', 'targetactorentity')) 58 | phoenix_data[, source := 'phoenix'] 59 | ## Drop any missing data 60 | phoenix_data <- phoenix_data[!is.na(rootcode)] 61 | phoenix_data <- phoenix_data[!is.na(eventcode)] 62 | phoenix_data <- phoenix_data[!is.na(goldstein)] 63 | ###### 64 | # 65 | # Combine ICEWS and Phoenix data 66 | # 67 | ###### 68 | try({ 69 | master_data <- rbind(icews_data, phoenix_data) 70 | }, silent = T) 71 | if(class(master_data)[1] == 'try-error'){ 72 | message('Specified range does not include Phoenix data.') 73 | master_data <- icews_data 74 | } 75 | setnames(master_data, c('sourceactorentity', 'targetactorentity') 76 | , c('actora', 'actorb')) 77 | ## Subset events: if a subset of EVENTCODES are specified, keep only that 78 | ## set of events and aggregate up from there. 79 | if(!any('all' %in% code_subset)){ 80 | master_data <- master_data[eventcode %in% code_subset] 81 | } 82 | ## Create new variable: Pentaclass (0-4) 83 | master_data[rootcode %in% c(1, 2), pentaclass := 0L] 84 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L] 85 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L] 86 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L] 87 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L] 88 | ###################################### 89 | ## IMPORTANT ASSUMPTION HERE: 90 | ## I am *ASSUMING* that NULL/NA entries after a state code 91 | ## implies that the actor is the GOVERNMENT. As such I am replacing 92 | ## all such missing entries with 'GOV'. 93 | ###################################### 94 | master_data[actora %in% countrycode::countrycode_data$iso3c 95 | , actora := paste0(actora, 'GOV')] 96 | master_data[actorb %in% countrycode::countrycode_data$iso3c 97 | , actorb := paste0(actorb, 'GOV')] 98 | ###### 99 | # 100 | # Pre-format data by de-duplicating, cleaning dates and actors, 101 | # and dropping unused columns 102 | # 103 | ###### 104 | ## Aggregate dates to specified time window 105 | master_data[, date := lubridate::floor_date(date, time_window)] 106 | ## Subset events: keep only events within date range 107 | master_data <- master_data[date %in% dates] 108 | ## Subset events and columns: only events that: 109 | ## 1. involve specified actor set on both side (as ENTITIES) 110 | ## 2. involve TWO DIFFERENT actors (i.e. no self-interactions 111 | ## as specified by user) 112 | if(('states' %in% actorset)){ 113 | master_data <- master_data[(actora %in% paste0(actors, 'GOV') 114 | & actorb %in% paste0(actors, 'GOV')) 115 | | (actora %in% paste0(actors, 'MIL') 116 | & actorb %in% paste0(actors, 'MIL'))] 117 | master_data <- master_data[substr(actora, 1, 3) != substr(actorb, 1, 3)] 118 | ## Set actor codes to state-level factors 119 | master_data[, actora := factor(substr(actora, 1, 3), levels = levels(actors))] 120 | master_data[, actorb := factor(substr(actorb, 1, 3), levels = levels(actors))] 121 | } else{ 122 | master_data <- master_data[(actora %in% actors 123 | & actorb %in% actors)] 124 | master_data <- master_data[actora != actorb] 125 | master_data[, actora := factor(actora, levels = levels(actors))] 126 | master_data[, actorb := factor(actorb, levels = levels(actors))] 127 | } 128 | ## Subset columns: drop unused event column 129 | if(level == 'rootcode'){ 130 | master_data[, eventcode := NULL] 131 | master_data[, goldstein := NULL] 132 | master_data[, pentaclass := NULL] 133 | } else if(level == 'eventcode') { 134 | master_data[, rootcode := NULL] 135 | master_data[, goldstein := NULL] 136 | master_data[, pentaclass := NULL] 137 | } else if(level == 'goldstein') { 138 | master_data[, eventcode := NULL] 139 | master_data[, rootcode := NULL] 140 | master_data[, pentaclass := NULL] 141 | } else if(level == 'pentaclass') { 142 | master_data[, eventcode := NULL] 143 | master_data[, rootcode := NULL] 144 | master_data[, goldstein := NULL] 145 | setcolorder(master_data, c(1,2,3,5,4)) 146 | } 147 | ## Set names to generic 148 | setnames(master_data, c('date', 'actora', 'actorb', 'code', 'source')) 149 | ## Set CAMEO coded event/root codes to factors 150 | master_data[, code := factor(code, levels = codes)] 151 | master_data 152 | plyr::vaggregate(master_data, .group = 'code', .fun = 'count') 153 | plyr::vaggregate(master_data, group = 'code', fun = 'count') 154 | plyr::vaggregate(master_data, .group = 'code', .fun = 'count') 155 | plyr::vaggregate(master_data, .group = 'code', 'count') 156 | master_data[, .N, by = code] 157 | master_data[, .N, by = list(date, actora, code)] 158 | master_data[, .N, by = list(date, actora, code, source)] 159 | master_data[, .N, by = list(date, actora, actorb, code, source)] 160 | ###### 161 | # 162 | # Set up some initial values: Time windows 163 | # 164 | ###### 165 | ## Date objects 166 | if (class(start_date) %in% c('numeric', 'integer') 167 | | class(end_date) %in% c('numeric', 'integer')){ 168 | start_date <- as.Date(lubridate::ymd(start_date)) 169 | end_date <- as.Date(lubridate::ymd(end_date)) 170 | } 171 | dates <- seq.Date(start_date, end_date, by = 'day') 172 | dates <- unique(lubridate::round_date(dates, time_window)) 173 | ###### 174 | # 175 | # Set up some initial values: Actors 176 | # 177 | ###### 178 | ## Paste-function that can handle NA entries 179 | ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste) 180 | paste3 <- function(...,sep=", ") { 181 | L <- list(...) 182 | L <- lapply(L,function(x) {x[is.na(x)] <- ""; x}) 183 | ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"", 184 | gsub(paste0(sep,sep),sep, 185 | do.call(paste,c(L,list(sep=sep))))) 186 | is.na(ret) <- ret=="" 187 | ret 188 | } 189 | ## Default actors: 255 ISO-coded countries 190 | if('states' %in% actorset){ 191 | # Set up set of primary actor codes 192 | statelist <- unique(countrycode::countrycode_data$iso3c) 193 | statelist <- statelist[!is.na(statelist)] 194 | actors <- as.factor(sort(statelist)) 195 | n <- length(actors) 196 | } else { 197 | ## Set up set of secondary actor codes 198 | secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD' 199 | , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL') 200 | statelist <- countrycode::countrycode_data$iso3c 201 | actors <- unique(statelist[statelist %in% actorset]) 202 | actors <- actors[!is.na(actors)] 203 | actors <- unique(as.vector(outer(actors, secondary_actors, paste, sep = ''))) 204 | actors <- as.factor(sort(actors)) 205 | n <- length(actors) 206 | } 207 | ###### 208 | # 209 | # Set up some initial values: Event codes 210 | # 211 | ###### 212 | ## Factor variables describing CAMEO categories 213 | if(level == 'rootcode'){ 214 | codes <- factor(1:20) 215 | levels(codes) <- as.character(1:20) 216 | } else if(level == 'eventcode'){ 217 | codes <- factor(1:298) 218 | levels(codes) <- as.character( 219 | c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31 220 | , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57 221 | , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86 222 | , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034 223 | , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116 224 | , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246 225 | , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385 226 | , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444 227 | , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663 228 | , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183 229 | , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042) 230 | ) 231 | } else if(level == 'pentaclass'){ 232 | codes <- factor(0:4) 233 | levels(codes) <- as.character(0:4) 234 | } 235 | ## Subset of event codes 236 | if(!any('all' %in% codeset)){ 237 | if(sum(!codeset %in% codes) > 0){ 238 | message('Warning: some event codes do not match specified event class. Proceeding with valid event codes.') 239 | } 240 | codes <- codes[codes %in% codeset] 241 | if(length(codes) == 0){ 242 | stop('Please enter a valid set of event codes or pentaclass values.') 243 | } 244 | } 245 | ###### 246 | # 247 | # Set up some empty storage objects 248 | # 249 | ###### 250 | # Storage for daily network objects 251 | master_networks <- vector('list', length(codes)) 252 | names(master_networks) <- paste0('code', codes) 253 | # Storage for comparison of Phoenix and ICEWS reporting overlap 254 | filler <- rep(NA, length(dates)) 255 | sources_overlap <- data.table(date = dates 256 | , phoenix_only = filler 257 | , icews_only = filler 258 | , both_sources = filler) 259 | ###### 260 | # 261 | # Download raw files from Phoenix data repo and ICEWS dataverse. 262 | # 263 | ###### 264 | ## Download new Phoenix data tables. This will download the entire 265 | ## archive the first time this function is run and fully populate 266 | ## the destination folder. 267 | ## NOTE: This currently requires a clumsy step where it reinstalls phoxy 268 | ## every time the code is run. This should be cleaned up, but I'm not 269 | ## 100% sure how to do so in a way that's both accurate and polite. 270 | message('Checking Phoenix data...') 271 | # library(phoxy) 272 | phoxy::update_phoenix(destpath = phoenix_loc, phoenix_version = 'current') 273 | ## Check to see if ICEWS folder exists and that it has at least one 'valid' 274 | ## ICEWS data table stored. 275 | message('Checking ICEWS data...') 276 | icews_checkfile <- 'events.2000.20150313082808.tab' 277 | icews_files <- list.files(icews_loc) 278 | if(!icews_checkfile %in% icews_files){ 279 | stop('Please enter a valid path that contains the ICEWS yearly files.') 280 | } else { 281 | message('ICEWS file location is valid.') 282 | } 283 | ###### 284 | # 285 | # Read and parse ICEWS data for merging. 286 | # 287 | ###### 288 | ## Read and parse ICEWS data 289 | message('Ingesting ICEWS data...') 290 | icews_data <- phoxy::ingest_icews(icews_loc, start_date, end_date) 291 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes 292 | ## for actors and states 293 | message('Munging ICEWS data...') 294 | icews_data <- icews_cameo(icews_data) 295 | ## Subset ICEWS data to only keep key columns 296 | icews_data <- icews_data[, list(date, sourceactorentity 297 | , targetactorentity, rootcode 298 | , eventcode, goldstein)] 299 | icews_data[, source := 'icews'] 300 | ###### 301 | # 302 | # Read and parse Phoenix data for merging. 303 | # 304 | ###### 305 | ## Read and parse Phoenix data 306 | message('Ingesting Phoenix data...') 307 | phoenix_data <- phoxy::ingest_phoenix(phoenix_loc = phoenix_loc 308 | , start_date = start_date 309 | , end_date = end_date) 310 | ## Subset Phoenix data to only keep key columns 311 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity 312 | , sourceactorrole, sep = '') 313 | , paste3(targetactorentity 314 | , targetactorrole, sep = '') 315 | , rootcode, eventcode, goldstein)] 316 | setnames(phoenix_data, c('V2', 'V3') 317 | , c('sourceactorentity', 'targetactorentity')) 318 | phoenix_data[, source := 'phoenix'] 319 | ## Drop any missing data 320 | phoenix_data <- phoenix_data[!is.na(rootcode)] 321 | phoenix_data <- phoenix_data[!is.na(eventcode)] 322 | phoenix_data <- phoenix_data[!is.na(goldstein)] 323 | ###### 324 | # 325 | # Combine ICEWS and Phoenix data 326 | # 327 | ###### 328 | try({ 329 | master_data <- rbind(icews_data, phoenix_data) 330 | }, silent = T) 331 | if(class(master_data)[1] == 'try-error'){ 332 | message('Specified range does not include Phoenix data.') 333 | master_data <- icews_data 334 | } 335 | setnames(master_data, c('sourceactorentity', 'targetactorentity') 336 | , c('actora', 'actorb')) 337 | ## Subset events: if a subset of EVENTCODES are specified, keep only that 338 | ## set of events and aggregate up from there. 339 | if(!any('all' %in% code_subset)){ 340 | master_data <- master_data[eventcode %in% code_subset] 341 | } 342 | ## Create new variable: Pentaclass (0-4) 343 | master_data[rootcode %in% c(1, 2), pentaclass := 0L] 344 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L] 345 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L] 346 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L] 347 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L] 348 | ###################################### 349 | ## IMPORTANT ASSUMPTION HERE: 350 | ## I am *ASSUMING* that NULL/NA entries after a state code 351 | ## implies that the actor is the GOVERNMENT. As such I am replacing 352 | ## all such missing entries with 'GOV'. 353 | ###################################### 354 | master_data[actora %in% countrycode::countrycode_data$iso3c 355 | , actora := paste0(actora, 'GOV')] 356 | master_data[actorb %in% countrycode::countrycode_data$iso3c 357 | , actorb := paste0(actorb, 'GOV')] 358 | ###### 359 | # 360 | # Pre-format data by de-duplicating, cleaning dates and actors, 361 | # and dropping unused columns 362 | # 363 | ###### 364 | ## Subset events and columns: only events that: 365 | ## 1. involve specified actor set on both side (as ENTITIES) 366 | ## 2. involve TWO DIFFERENT actors (i.e. no self-interactions 367 | ## as specified by user) 368 | if(('states' %in% actorset)){ 369 | master_data <- master_data[(actora %in% paste0(actors, 'GOV') 370 | & actorb %in% paste0(actors, 'GOV')) 371 | | (actora %in% paste0(actors, 'MIL') 372 | & actorb %in% paste0(actors, 'MIL'))] 373 | master_data <- master_data[substr(actora, 1, 3) != substr(actorb, 1, 3)] 374 | ## Set actor codes to state-level factors 375 | master_data[, actora := factor(substr(actora, 1, 3), levels = levels(actors))] 376 | master_data[, actorb := factor(substr(actorb, 1, 3), levels = levels(actors))] 377 | } else{ 378 | master_data <- master_data[(actora %in% actors 379 | & actorb %in% actors)] 380 | master_data <- master_data[actora != actorb] 381 | master_data[, actora := factor(actora, levels = levels(actors))] 382 | master_data[, actorb := factor(actorb, levels = levels(actors))] 383 | } 384 | ## Subset columns: drop unused event column 385 | if(level == 'rootcode'){ 386 | master_data[, eventcode := NULL] 387 | master_data[, goldstein := NULL] 388 | master_data[, pentaclass := NULL] 389 | } else if(level == 'eventcode') { 390 | master_data[, rootcode := NULL] 391 | master_data[, goldstein := NULL] 392 | master_data[, pentaclass := NULL] 393 | } else if(level == 'goldstein') { 394 | master_data[, eventcode := NULL] 395 | master_data[, rootcode := NULL] 396 | master_data[, pentaclass := NULL] 397 | } else if(level == 'pentaclass') { 398 | master_data[, eventcode := NULL] 399 | master_data[, rootcode := NULL] 400 | master_data[, goldstein := NULL] 401 | setcolorder(master_data, c(1,2,3,5,4)) 402 | } 403 | ## Set names to generic 404 | setnames(master_data, c('date', 'actora', 'actorb', 'code', 'source')) 405 | ## Set CAMEO coded event/root codes to factors 406 | master_data[, code := factor(code, levels = codes)] 407 | ## Set keys 408 | setkeyv(master_data, c('date', 'actora', 'actorb', 'code', 'source')) 409 | ###### 410 | # 411 | # Export : how much overlap between Phoenix and ICEWS reporting? 412 | # 413 | ###### 414 | ## Create some temporary flag variables 415 | master_data[, dup_fromtop := duplicated( 416 | master_data[, list(date, actora, actorb, code)])] 417 | master_data[, dup_frombot := duplicated( 418 | master_data[, list(date, actora, actorb, code)], fromLast = T)] 419 | ## Export data on reporting overlap 420 | # Phoenix reporting only 421 | dates_tab <- data.table(date = dates) 422 | phoenix_only <- master_data[, sum(dup_fromtop == F 423 | & source == 'phoenix'), by = date] 424 | phoenix_only <- merge(dates_tab, phoenix_only, by = 'date', all.x = T) 425 | phoenix_only[is.na(V1), V1 := 0] 426 | sources_overlap$phoenix_only <- phoenix_only$V1 427 | # ICEWS reporting only 428 | icews_only <- master_data[, sum(dup_frombot == F 429 | & source == 'icews'), by = date] 430 | icews_only <- merge(dates_tab, icews_only, by = 'date', all.x = T) 431 | icews_only[is.na(V1), V1 := 0] 432 | sources_overlap$icews_only <- icews_only$V1 433 | # Both sources report 434 | both_sources <- master_data[, sum(dup_fromtop == T), by = date] 435 | both_sources <- merge(dates_tab, both_sources, by = 'date', all.x = T) 436 | both_sources[is.na(V1), V1 := 0] 437 | sources_overlap$both_sources <- both_sources$V1 438 | ## Drop flags and source variable 439 | master_data[, dup_fromtop := NULL] 440 | master_data[, dup_frombot := NULL] 441 | master_data[, source := NULL] 442 | master_data 443 | sources_overlap 444 | master_data <- unique(master_data) 445 | master_data[, date := lubridate::floor_date(date, time_window)] 446 | master_data <- master_data[date %in% dates] 447 | master_data 448 | master_data[, .N, by = list(date, actora, actorb, code)] 449 | tie_type = 'count' 450 | ## Subset events 451 | if(tie_type == 'binary'){ 452 | ## Subset events: drop duplicated events/days/actors 453 | master_data <- master_data[!duplicated(master_data)] 454 | } else if(tie_type == 'count'){ 455 | ## Subset events: drop duplicated events/days/actors 456 | master_data <- master_data[, .N, by = list(date, actora, actorb, code)] 457 | } 458 | master_data 459 | ## Format for networkDynamic creation 460 | master_data[, date := as.integer(format(date, '%Y%m%d'))] 461 | master_data[, end_date := date] 462 | setcolorder(master_data, c('date', 'end_date', 'actora', 'actorb', 'code')) 463 | ###### 464 | # 465 | # For each time period in the specified range, subset the master data set, 466 | # convert interactions to network ties, and turn the resulting edgelist 467 | # into a network object. Save networks to a master list object. 468 | # 469 | ###### 470 | dates <- c(dates, (max(dates) + 1)) 471 | dates <- as.integer(format(dates, '%Y%m%d')) 472 | master_data[, date := as.integer(format(date, '%Y%m%d'))] 473 | master_data[, end_date := date] 474 | master_data 475 | this_code <- 1 476 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb)] 477 | this_code <- '4' 478 | if(tie_type == 'binary'){ 479 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb)] 480 | } else if(tie_type == 'count'){ 481 | event_data <- master_data[code %in% this_code, list(date, end_date, actora, actorb, N)] 482 | } 483 | event_data[, actora := as.integer(actora)] 484 | event_data[, actorb := as.integer(actorb)] 485 | event_net <- network::network.initialize(n = n, directed = T, loops = F) 486 | network.vertex.names(event_net) <- levels(actors) 487 | event_net 488 | event_data 489 | ?networkDynamic 490 | temporal_codenet <- networkDynamic(base.net = event_net 491 | , edge.spells = event_data 492 | , net.obs.period = net_period 493 | , edge.TEA.names = 'N' 494 | , verbose = F) 495 | temporal_codenet 496 | get.edge.attribute(temporal_codenet, 'N') 497 | get.edge.attribute(temporal_codenet, 'active') 498 | ?activate.edge.attribute 499 | temporal_codenet <- networkDynamic(base.net = event_net 500 | , edge.spells = event_data 501 | , net.obs.period = net_period 502 | , create.TEAs = T 503 | , edge.TEA.names = 'N' 504 | , verbose = F) 505 | temporal_codenet 506 | get.edge.attribute(temporal_codenet, 'N.active') 507 | get.edge.attribute(temporal_codenet, 'active') 508 | foo <- network.collapse(temporal_codenet, at = 20150101) 509 | foo 510 | get.edge.attribute(foo, 'N') 511 | plot(foo) 512 | library(phoenixNet) 513 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/.DS_Store -------------------------------------------------------------------------------- /.Rapp.history: -------------------------------------------------------------------------------- 1 | setwd("/Users/bkinne/Dropbox/Minerva/phoenixNet") 2 | library(devtools) 3 | remove.packages("phoenixNet") 4 | getwd() 5 | build() 6 | install() 7 | library(phoenixNet) 8 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week") 9 | ?phoenix_net 10 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 11 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 12 | ?phoenix_net 13 | net.dat <- phoenix_net("20010101", "20021231", level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 14 | remove.packages("phoxy") 15 | devtools::install_github("ahalterman/phoxy") 16 | net.dat <- phoenix_net("20010101", "20021231", level="pentaclass", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 17 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 18 | traceback() 19 | net.dat <- phoenix_net(20100101, 20121231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 20 | net.dat <- phoenix_net(20150101, 20151031, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 21 | remove.packages("phoenixNet") 22 | build() 23 | install() 24 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 25 | remove.packages("phoenixNet") 26 | devtools::install_github("jrhammond/phoenixNet") 27 | library(phoenixNet) 28 | net.dat <- phoenix_net(20010101, 20021231, level="pentaclass", phoenix_loc="~/Dropbox/Minerva/phoenix", icews_loc="~/Dropbox/Minerva/icews", time_window="week", update=F) 29 | remove.packages("phoenixNet") 30 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | phoenix_processing.R 5 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: EventNetworks 2 | Title: Gather, munge, and convert event data into daily event-networks. 3 | Version: 0.0.0.9000 4 | Author: person("Jesse", "Hammond", email = "jesse.hammond1@gmail.com", role = 5 | c("aut", "cre")) 6 | Description: This package downloads daily event data (time range set 7 | by user) into a set of temp files. Event data are merged and de-duplicated, 8 | then turned into daily network objects by event-code or event-root-code. The 9 | resulting set of networks is saved as a (very) large list object, organized by 10 | event-code and then by date. 11 | Depends: 12 | R (>= 3.1.2), 13 | data.table, 14 | countrycode, 15 | reshape2, 16 | statnet, 17 | plyr, 18 | knitr 19 | VignetteBuilder: knitr 20 | Suggests: 21 | knitr 22 | License: GPL-3 23 | LazyData: true 24 | RoxygenNote: 6.0.1 25 | -------------------------------------------------------------------------------- /EventNetworks.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[^\\.]") 2 | -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/R/.DS_Store -------------------------------------------------------------------------------- /R/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/R/.Rapp.history -------------------------------------------------------------------------------- /R/EventNetworks.R: -------------------------------------------------------------------------------- 1 | #' Convert Phoenix event data to daily event-networks. 2 | #' 3 | #' Take event-level data and convert it into 4 | #' networks of interaction by time period. Output is in 5 | #' the form of a nested list object where each element is 6 | #' an R network object. These networks can then be processed 7 | #' and analyzed. 8 | #' 9 | #' @param start_date start date of time period as Ymd-format integer (ex: 10 | #' June 1, 2014 as 20140601). 11 | #' @param end_date end date of time period as Ymd-format integer (ex: 12 | #' June 1, 2014 as 20140601). 13 | #' @param dv_server Dataverse server address from which to download 14 | #' up-to-date ICEWS data. Defaults to Harvard Dataverse at 15 | #' harvard.dataverse.edu. 16 | #' @param dv_key Unique user key to access SWORD API and automatically find and 17 | #' download up-to-date ICEWS data. 18 | #' @param level level of event granularity ('eventcode', 'rootcode', 19 | #' 'pentaclass', or 'goldstein'). 'Eventcode' creates a network for 20 | #' each of the 226 sub-codes in CAMEO. 'Rootcode' creates a network 21 | #' for each of the 20 event root codes in CAMEO. 'Pentaclass' creates 22 | #' a network for each of the 0-4 pentaclass codes in CAMEO. 23 | #' 'Goldstein' creates one or two networks denoting mean Goldstein 24 | #' scores, either aggregated (positive - negative) or separated into 25 | #' two separate networks for positive and negative Goldstein scores. 26 | #' @param phoenix_loc folder containing Phoenix data sets as daily .csv 27 | #' data tables. Automatically checks for new data sets each time 28 | #' the function is run, and downloads new daily data as it becomes 29 | #' available. Currently in 'one-and'done' format 30 | #' where it downloads the first time, and checks thereafter. 31 | #' @param icews_loc folder containing ICEWS data sets as daily .tab data 32 | #' tables. Because I don't know how to work a SWORD API, these will 33 | #' need to be manually downloaded and updated. 34 | #' @param histphoenix_loc folder containing historic Phoenix data from 35 | #' UIUC's Cline Center for Democracy. Leave empty if you don't 36 | #' want to use these data. 37 | #' @param dv_server location of the ICEWS Dataverse server. Defaults to 38 | #' "harvard.dataverse.edu" and probably won't change anytime soon. 39 | #' @param update should phoenixNet attempt to download new data? This will attempt 40 | #' to download any Phoenix data files that 'should' be present in the 41 | #' Phoenix data directory (one data file per day, from 2014-06-20 through 42 | #' the present day) and denote whether or not any of these files 43 | #' come up missing in the process. 44 | #' @param actorset set of actors for which to create event-networks. Defaults 45 | #' to the 255 ISO-coded states in the international system. Specifying 46 | #' a specific state or set of states (as 3-character ISO codes) will 47 | #' extract all the 'major' domestic entites within that state/states. 48 | #' @param codeset subset of event codes as specified by 'level'. This is useful 49 | #' if you desire to extract only a portion of interactions recorded 50 | #' by CAMEO, but has to align with the code aggregation specified 51 | #' in the 'level' argument. For example, if you specify 'rootcode', 52 | #' the 'codeset' you specify has to be one or more root codes between 53 | #' 1 and 20. Entering a subset of root code values would return a 54 | #' smaller number of network layers. Defaults to 'all'. 55 | #' @param code_subset subset of EVENTCODES that can be aggregated up to higher 56 | #' order interactions. For example, you might want to only look at 57 | #' event codes below 100, but then aggregate those event codes to 58 | #' rootcode or pentaclass. 59 | #' @param time_window temporal window to build event-networks. Valid 60 | #' entries are 'day', 'week', 'month', 'quarter', or 'year'. 61 | #' @param tie_type type of ties to return. Default is binarized ties where 62 | #' a tie represents the presence of one OR MORE interactions in the 63 | #' time period specified. Valid entries are 'binary', 'count' 64 | #' (count of events), 'meangoldstein' (mean Goldstein score), 65 | #' 'sepgoldstein' (mean positive/negative Goldstein scores separated). 66 | #' NOTE: choosing a Goldstein score as tie type negates the "level" 67 | #' argument. 68 | #' @param sources use only Phoenix or ICEWS data in creating event networks. 69 | #' Valid entries are 'phoenix', 'icews', 'histphoenix' or 'all' (default). 70 | #' 71 | #' @return master_networks a LIST object containing temporally referenced event-networks. 72 | #' 73 | #' @rdname eventNetworks 74 | #' 75 | #' @author Jesse Hammond 76 | #' 77 | #' @note This function is still in early development and may contain significant errors. 78 | #' Don't trust it. 79 | #' 80 | 81 | #' @export 82 | #' 83 | #' @import data.table 84 | #' @import countrycode 85 | #' @import reshape2 86 | #' @import statnet 87 | #' @import tsna 88 | #' @import plyr 89 | #' @import lubridate 90 | #' @import dataverse 91 | #' @import bit64 92 | eventNetworks <- function(start_date 93 | , end_date 94 | , level 95 | , dv_key 96 | , phoenix_loc = NULL 97 | , icews_loc = NULL 98 | , histphoenix_loc = NULL 99 | , dv_server = 'harvard.dataverse.edu' 100 | , update = TRUE 101 | , actorset = 'states' 102 | , codeset = 'all' 103 | , time_window = 'day' 104 | , code_subset = 'all' 105 | , tie_type = 'binary' 106 | , sources = 'all' 107 | ){ 108 | 109 | library(lubridate) 110 | ###### 111 | # 112 | # Set up some initial values: Time windows 113 | # 114 | ###### 115 | 116 | ## Date objects 117 | if (class(start_date) %in% c('numeric', 'integer') 118 | | class(end_date) %in% c('numeric', 'integer')){ 119 | start_date <- as.Date(lubridate::ymd(start_date)) 120 | end_date <- as.Date(lubridate::ymd(end_date)) 121 | } 122 | dates <- seq.Date(start_date, end_date, by = 'day') 123 | dates <- unique(lubridate::floor_date(dates, time_window)) 124 | 125 | ###### 126 | # 127 | # Set up some initial values: Actors 128 | # 129 | ###### 130 | 131 | ## Paste-function that can handle NA entries 132 | ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste) 133 | paste3 <- function(...,sep=", ") { 134 | L <- list(...) 135 | L <- lapply(L,function(x) {x[is.na(x)] <- ""; x}) 136 | ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"", 137 | gsub(paste0(sep,sep),sep, 138 | do.call(paste,c(L,list(sep=sep))))) 139 | is.na(ret) <- ret=="" 140 | ret 141 | } 142 | 143 | ## Default actors: 255 ISO-coded countries 144 | if('states' %in% actorset){ 145 | # Set up set of primary actor codes 146 | statelist <- unique(countrycode::countrycode_data$iso3c) 147 | statelist <- statelist[!is.na(statelist)] 148 | statelist <- c(statelist, 'KSV', 'IGO') 149 | statelist <- sort(statelist) 150 | actors <- as.factor(statelist) 151 | n <- length(actors) 152 | 153 | } else { 154 | ## Set up set of secondary actor codes 155 | secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD' 156 | , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL') 157 | statelist <- countrycode::countrycode_data$iso3c 158 | statelist <- statelist[!is.na(statelist)] 159 | actors <- unique(statelist[statelist %in% actorset]) 160 | actors <- actors[!is.na(actors)] 161 | actors <- unique(as.vector(outer(actors, secondary_actors, paste, sep = ''))) 162 | actors <- as.factor(sort(actors)) 163 | n <- length(actors) 164 | } 165 | 166 | ###### 167 | # 168 | # Set up some initial values: Event codes 169 | # 170 | ###### 171 | 172 | ## Factor variables describing CAMEO categories 173 | if(level == 'rootcode'){ 174 | codes <- factor(1:20) 175 | levels(codes) <- as.character(1:20) 176 | } else if(level == 'eventcode'){ 177 | codes <- factor(1:298) 178 | levels(codes) <- as.character( 179 | c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31 180 | , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57 181 | , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86 182 | , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034 183 | , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116 184 | , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246 185 | , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385 186 | , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444 187 | , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663 188 | , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183 189 | , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042) 190 | ) 191 | } else if(level == 'pentaclass'){ 192 | codes <- factor(0:4) 193 | levels(codes) <- as.character(0:4) 194 | } else if(level == 'goldstein'){ 195 | if(tie_type == 'netgoldstein'){ 196 | codes <- 1 197 | } else if(tie_type == 'sepgoldstein'){ 198 | codes = c('mean_neg_goldstein', 'mean_pos_goldstein') 199 | } 200 | } 201 | 202 | ## Subset of event codes 203 | if(!any('all' %in% codeset)){ 204 | if(sum(!codeset %in% codes) > 0){ 205 | message('Warning: some event codes do not match specified event class. 206 | Proceeding with valid event codes.') 207 | } 208 | codes <- codes[codes %in% codeset] 209 | if(length(codes) == 0){ 210 | stop('Please enter a valid set of event codes or pentaclass values.') 211 | } 212 | } 213 | 214 | 215 | ###### 216 | # 217 | # Download raw files from Phoenix data repo and ICEWS dataverse. 218 | # 219 | ###### 220 | 221 | ## Download new Phoenix data tables. This will download the entire 222 | ## archive the first time this function is run and fully populate 223 | ## the destination folder. 224 | 225 | if(update == T){ 226 | message('Checking Phoenix data...') 227 | EventNetworks::update_phoenix(destpath = phoenix_loc) 228 | message('Checking ICEWS data...') 229 | EventNetworks::update_icews(destpath = icews_loc) 230 | } 231 | 232 | 233 | ###### 234 | # 235 | # Read and parse ICEWS data for merging. 236 | # 237 | ###### 238 | 239 | icews_data <- data.table(date = as.Date(character()) 240 | , actora = character() 241 | , actorb = character() 242 | , rootcode = numeric() 243 | , eventcode = integer() 244 | , goldstein = numeric()) 245 | 246 | if (sources %in% c('ICEWS', 'all')){ 247 | 248 | if(end_date < as.Date('1995-01-01')){ 249 | ## Only parse ICEWS data if it exists in that date range 250 | icews_data <- data.table(date = as.Date(character()) 251 | , actora = character() 252 | , actorb = character() 253 | , rootcode = numeric() 254 | , eventcode = integer() 255 | , goldstein = numeric()) 256 | message('Specified timespan ends before ICEWS data coverage begins.') 257 | 258 | } else { 259 | ## Check to see if ICEWS folder exists and that it has at least one 'valid' 260 | ## ICEWS data table stored. 261 | years <- format(unique(lubridate::floor_date(dates, 'year')), '%Y') 262 | message('Checking ICEWS data...') 263 | icews_files <- list.files(icews_loc) 264 | icews_years <- ldply(strsplit(icews_files, '\\.'))$V2 265 | access_years <- which(icews_years %in% years) 266 | 267 | if(length(access_years) == 0){ 268 | message('No ICEWS files found in the specified timespan.') 269 | } else { 270 | 271 | ## Read and parse ICEWS data 272 | message('Ingesting ICEWS data...') 273 | icews_data <- ingest_icews(icews_loc, start_date, end_date) 274 | 275 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes 276 | ## for actors and states 277 | message('Munging ICEWS data...') 278 | icews_data <- icews_cameo(icews_data) 279 | 280 | ## Subset ICEWS data to only keep key columns 281 | icews_data <- icews_data[, list(date, sourceactorentity 282 | , targetactorentity, rootcode 283 | , eventcode, goldstein)] 284 | setnames(icews_data, c('sourceactorentity', 'targetactorentity') 285 | , c('actora', 'actorb')) 286 | } 287 | } 288 | } else { 289 | 290 | } 291 | 292 | 293 | ###### 294 | # 295 | # Read and parse live Phoenix data for merging. 296 | # 297 | ###### 298 | 299 | phoenix_data <- data.table(date = as.Date(character()) 300 | , actora = character() 301 | , actorb = character() 302 | , rootcode = numeric() 303 | , eventcode = integer() 304 | , goldstein = numeric()) 305 | 306 | if (sources %in% c('phoenix', 'all')){ 307 | 308 | if(end_date < as.Date('2014-06-20')){ 309 | ## Only parse Phoenix data if it exists in that date range 310 | message('Specified timespan ends before live Phoenix data coverage begins.') 311 | 312 | } else { 313 | 314 | ## Read and parse Phoenix data 315 | message('Ingesting Phoenix data...') 316 | phoenix_data <- ingest_phoenix(phoenix_loc = phoenix_loc 317 | , start_date = start_date 318 | , end_date = end_date) 319 | 320 | ## Subset Phoenix data to only keep key columns 321 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity 322 | , sourceactorrole, sep = '') 323 | , paste3(targetactorentity 324 | , targetactorrole, sep = '') 325 | , rootcode, eventcode, goldstein)] 326 | setnames(phoenix_data, c('V2', 'V3') 327 | , c('actora', 'actorb')) 328 | 329 | ## Drop any missing data 330 | phoenix_data <- phoenix_data[!is.na(rootcode)] 331 | phoenix_data <- phoenix_data[!is.na(eventcode)] 332 | phoenix_data <- phoenix_data[!is.na(goldstein)] 333 | } 334 | } 335 | 336 | ###### 337 | # 338 | # Read and parse historic Phoenix data for merging. 339 | # 340 | ###### 341 | 342 | histphoenix_data <- data.table(date = as.Date(character()) 343 | , actora = character() 344 | , actorb = character() 345 | , rootcode = numeric() 346 | , eventcode = integer() 347 | , goldstein = numeric()) 348 | 349 | if (sources %in% c('histphoenix', 'all')){ 350 | 351 | if(end_date < as.Date('1945-01-01')){ 352 | ## Only parse Phoenix data if it exists in that date range 353 | message('Specified timespan ends before historic Phoenix data coverage begins.') 354 | 355 | } else { 356 | 357 | ## Read and parse Phoenix data 358 | message('Ingesting historic Phoenix data...') 359 | 360 | ## Read in and pre-parse historic Phoenix data 361 | histphoenix_data <- ingest_histphoenix(histphoenix_loc, start_date, end_date, actors) 362 | 363 | ## Subset historic Phoenix data to only keep key columns 364 | histphoenix_data <- histphoenix_data[, list(date, source, target 365 | , root_code, code, goldstein)] 366 | setnames( 367 | histphoenix_data 368 | , c('date', 'actora', 'actorb', 'rootcode', 'eventcode', 'goldstein') 369 | ) 370 | } 371 | } 372 | 373 | 374 | ###### 375 | # 376 | # Combine data sets 377 | # 378 | ###### 379 | 380 | master_data <- rbind(icews_data, phoenix_data, histphoenix_data) 381 | if(nrow(master_data) == 0){ 382 | stop('No Phoenix or ICEWS data available for the specified timespan.') 383 | } 384 | 385 | ## Subset events: if a subset of EVENTCODES are specified, keep only that 386 | ## set of events and aggregate up from there. 387 | if(!any('all' %in% code_subset)){ 388 | master_data <- master_data[eventcode %in% code_subset] 389 | } 390 | 391 | ## Create new variable: Pentaclass (0-4) 392 | master_data[rootcode %in% c(1, 2), pentaclass := 0L] 393 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L] 394 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L] 395 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L] 396 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L] 397 | 398 | 399 | ###################################### 400 | ## IMPORTANT ASSUMPTION HERE: 401 | ## I am *ASSUMING* that NULL/NA entries after a state code 402 | ## implies that the actor is the GOVERNMENT. As such I am replacing 403 | ## all such missing entries with 'GOV'. 404 | ###################################### 405 | master_data[actora %in% countrycode::countrycode_data$iso3c 406 | , actora := paste0(actora, 'GOV')] 407 | master_data[actorb %in% countrycode::countrycode_data$iso3c 408 | , actorb := paste0(actorb, 'GOV')] 409 | 410 | 411 | ###### 412 | ## Subset events and columns: only events that: 413 | ## 1. involve specified actor set on both side (as ENTITIES) 414 | ## 2. involve TWO DIFFERENT actors (i.e. no self-interactions 415 | ## as specified by user) 416 | ###### 417 | if(('states' %in% actorset)){ 418 | master_data <- master_data[ 419 | substr(actora, 1, 3) %in% actors & substr(actora, 4, 6) %in% c('GOV', 'MIL', '') 420 | & substr(actorb, 1, 3) %in% actors & substr(actorb, 4, 6) %in% c('GOV', 'MIL', '') 421 | & actora != actorb 422 | ] 423 | master_data[, actora := substr(actora, 1, 3)] 424 | master_data[, actorb := substr(actorb, 1, 3)] 425 | master_data[, actora := factor(actora, levels = levels(actors))] 426 | master_data[, actorb := factor(actorb, levels = levels(actors))] 427 | 428 | } else{ 429 | master_data[, actora := substr(actora, 1, 6)] 430 | master_data[, actorb := substr(actorb, 1, 6)] 431 | master_data <- master_data[(actora %in% actors 432 | & actorb %in% actors 433 | & actora != actorb)] 434 | master_data[, actora := factor(actora, levels = actors)] 435 | master_data[, actorb := factor(actorb, levels = actors)] 436 | } 437 | 438 | 439 | ###### 440 | # 441 | # Format data by de-duplicating, separating by date, 442 | # and dropping unused columns 443 | # 444 | ###### 445 | 446 | ## Drop duplicated variables 447 | master_data <- unique(master_data) 448 | 449 | ## Drop self-events 450 | master_data <- master_data[actora != actorb] 451 | 452 | ## Subset columns: drop unused event column 453 | if(level == 'rootcode'){ 454 | master_data[, eventcode := NULL] 455 | master_data[, goldstein := NULL] 456 | master_data[, pentaclass := NULL] 457 | } else if(level == 'eventcode') { 458 | master_data[, rootcode := NULL] 459 | master_data[, goldstein := NULL] 460 | master_data[, pentaclass := NULL] 461 | } else if(level == 'goldstein') { 462 | master_data[, eventcode := NULL] 463 | master_data[, rootcode := NULL] 464 | master_data[, pentaclass := NULL] 465 | } else if(level == 'pentaclass') { 466 | master_data[, eventcode := NULL] 467 | master_data[, rootcode := NULL] 468 | master_data[, goldstein := NULL] 469 | } 470 | 471 | 472 | ## Set names to generic 473 | setnames(master_data, c('date', 'actora', 'actorb', 'code')) 474 | 475 | ## Set CAMEO coded event/root/pentaclass codes to factors 476 | # if(!level == 'goldstein'){ 477 | # master_data[, code := factor(code, levels = codes)] 478 | # } 479 | 480 | ## Set keys 481 | setkeyv(master_data, c('date', 'actora', 'actorb', 'code')) 482 | 483 | 484 | ## Aggregate dates to specified time window 485 | master_data[, date := lubridate::floor_date(date, time_window)] 486 | 487 | ## Subset events: keep only events within date range 488 | master_data <- master_data[date %in% dates] 489 | 490 | ## Subset events 491 | if(tie_type == 'binary'){ 492 | ## Subset events: drop duplicated events/days/actors 493 | master_data <- unique(master_data) 494 | } else if(tie_type == 'count'){ 495 | ## Subset events: drop duplicated events/days/actors 496 | master_data <- master_data[, .N, by = list(date, actora, actorb, code)] 497 | } else if(tie_type == 'meangoldstein'){ 498 | master_data <- master_data[, mean_goldstein := mean(code), by = list(date, actora, actorb)] 499 | } else if(tie_type == 'sepgoldstein'){ 500 | master_data[, pos_goldstein := NA_real_] 501 | master_data[code > 0, pos_goldstein := code] 502 | master_data[, neg_goldstein := NA_real_] 503 | master_data[code < 0, neg_goldstein := code] 504 | master_data[, mean_pos_goldstein := mean(pos_goldstein, na.rm = T), by = list(date, actora, actorb)] 505 | master_data[is.na(mean_pos_goldstein), mean_pos_goldstein := 0] 506 | master_data[, mean_neg_goldstein := mean(neg_goldstein, na.rm = T), by = list(date, actora, actorb)] 507 | master_data[is.na(mean_neg_goldstein), mean_neg_goldstein := 0] 508 | master_data <- master_data[mean_pos_goldstein != 0 | mean_neg_goldstein != 0, ] 509 | master_data <- unique(master_data, by = c('date', 'actora', 'actorb')) 510 | master_data[, code := as.integer(mean_pos_goldstein > 0) + 1] 511 | } 512 | 513 | ## Format for networkDynamic creation 514 | master_data[, date := as.integer(format(date, '%Y%m%d'))] 515 | master_data[, end_date := date] 516 | # setcolorder(master_data, c('date', 'end_date', 'actora', 'actorb', 'code')) 517 | 518 | ###### 519 | # 520 | # For each time period in the specified range, subset the master data set, 521 | # convert interactions to network ties, and turn the resulting edgelist 522 | # into a network object. Save networks to a master list object. 523 | # 524 | ###### 525 | 526 | 527 | if(time_window == 'day'){ 528 | dates <- c(dates, dates[length(dates)]) 529 | dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::days(1) 530 | } else if(time_window == 'week'){ 531 | dates <- c(dates, dates[length(dates)]) 532 | dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::weeks(1) 533 | } else if(time_window == 'month'){ 534 | dates <- c(dates, dates[length(dates)]) 535 | dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) %m+% months(1) 536 | } else if(time_window == 'quarter'){ 537 | dates <- c(dates, dates[length(dates)]) 538 | dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) %m+% months(1) 539 | } else if(time_window == 'year'){ 540 | dates <- c(dates, dates[length(dates)]) 541 | dates[length(dates)] <- lubridate::ymd(dates[length(dates)]) + lubridate::years(1) 542 | } 543 | 544 | final_dates <- as.integer(format(dates, '%Y%m%d')) 545 | 546 | 547 | ###### 548 | ## Break out tie construction and decide on output format based on tie type. 549 | ##### 550 | 551 | dated_arrays <- list() 552 | 553 | ###### Binary or count-weighted ties 554 | if(tie_type %in% c('binary', 'count')){ 555 | 556 | if(tie_type == 'binary'){ 557 | event_data <- master_data[, list(date, end_date, actora, actorb, code)] 558 | event_data[, N := 1] 559 | } else { 560 | event_data <- master_data[, list(date, end_date, actora, actorb, code, N)] 561 | } 562 | 563 | n_codes <- length(codes) 564 | 565 | for(i in 1:length(final_dates)){ 566 | 567 | date_array <- array( 568 | 0 569 | , dim = c(n, n, n_codes) 570 | , dimnames = list(actors, actors, unique(codes)) 571 | ) 572 | this_date <- final_dates[i] 573 | 574 | for(j in 1:n_codes){ 575 | this_code <- codes[j] 576 | this_events <- event_data[date %in% this_date & code %in% this_code] 577 | 578 | if(nrow(this_events) > 0){ 579 | this_events <- this_events[order(actora, actorb)] 580 | this_dyad_idx <- as.matrix( 581 | this_events[, list(as.integer(actora), as.integer(actorb))] 582 | ) 583 | date_array[cbind(this_dyad_idx, this_code)] <- this_events[, N] 584 | } 585 | } 586 | dated_arrays[[i]] <- date_array 587 | } 588 | } 589 | 590 | ###### Mean Goldstein score ties 591 | if(tie_type %in% 'meangoldstein'){ 592 | 593 | event_data <- master_data[, list(date, end_date, actora, actorb, mean_goldstein)] 594 | 595 | for(i in 1:length(final_dates)){ 596 | 597 | date_array <- matrix( 598 | 0 599 | , nrow = n 600 | , ncol = n 601 | , dimnames = list(actors, actors) 602 | ) 603 | this_date <- final_dates[i] 604 | this_events <- event_data[date %in% this_date] 605 | 606 | if(nrow(this_events) > 0){ 607 | this_events <- unique(this_events) 608 | this_events <- this_events[order(actora, actorb)] 609 | this_dyad_idx <- as.matrix( 610 | this_events[, list(as.integer(actora), as.integer(actorb))] 611 | ) 612 | date_array[this_dyad_idx] <- this_events[, mean_goldstein] 613 | } 614 | 615 | dated_arrays[[i]] <- date_array 616 | } 617 | 618 | } 619 | 620 | ###### Separated pos/neg Goldstein score ties 621 | if(tie_type %in% 'sepgoldstein'){ 622 | 623 | 624 | event_data <- master_data[ 625 | , list(date, end_date, actora, actorb, mean_pos_goldstein, mean_neg_goldstein) 626 | ] 627 | 628 | for(i in 1:length(final_dates)){ 629 | 630 | date_array <- array( 631 | 0 632 | , dim = c(n, n, 2) 633 | , dimnames = list( 634 | actors 635 | , actors 636 | , c('mean_pos_goldstein', 'mean_neg_goldstein') 637 | ) 638 | ) 639 | 640 | this_date <- final_dates[i] 641 | this_events <- event_data[date %in% this_date] 642 | 643 | if(nrow(this_events) > 0){ 644 | this_events <- unique(this_events) 645 | this_events <- this_events[order(actora, actorb)] 646 | this_dyad_pos_idx <- as.matrix( 647 | this_events[mean_pos_goldstein > 0, list(as.integer(actora), as.integer(actorb))] 648 | ) 649 | date_array[cbind(this_dyad_pos_idx, 1)] <- this_events[mean_pos_goldstein > 0, mean_pos_goldstein] 650 | 651 | this_dyad_neg_idx <- as.matrix( 652 | this_events[mean_neg_goldstein < 0, list(as.integer(actora), as.integer(actorb))] 653 | ) 654 | date_array[cbind(this_dyad_neg_idx, 2)] <- this_events[mean_neg_goldstein < 0, mean_neg_goldstein] 655 | } 656 | 657 | dated_arrays[[i]] <- date_array 658 | } 659 | 660 | 661 | } 662 | 663 | 664 | return(dated_arrays) 665 | } 666 | 667 | -------------------------------------------------------------------------------- /R/agents_doc.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' ICEWS CAMEO actor codes 3 | #' 4 | #' Merge table to convert actors to CAMEO format using conversion tables created 5 | #' by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data(agents) 10 | #' 11 | #' @keywords datasets 12 | #' 13 | #' 14 | "agents" 15 | -------------------------------------------------------------------------------- /R/convert_cameo.R: -------------------------------------------------------------------------------- 1 | #' Convert CAMEO Codes 2 | #' 3 | #' Converts CAMEO codes. CAMEO is an event ontology used in event data projects, including Phoenix. 4 | #' 5 | #' @param cameo Vector of CAMEO event codes. 6 | #' @keywords event data 7 | #' @export 8 | #' @aliases countrycode 9 | #' @examples 10 | #' events$Description <- phoxy::convert_cameo(events$EventCode) # Vector of values to be converted 11 | #' 12 | convert_cameo <- function(cameo){ 13 | data(convert_cameo_data, envir=environment()) 14 | # Sanity check 15 | # origin_codes <- names(phoxy::countrycode_data)[!(names(phoxy::countrycode_data) %in% c("continent","region","regex"))] 16 | # destination_codes <- names(phoxy::countrycode_data)[!(names(phoxy::countrycode_data) %in% c("regex"))] 17 | # if (!origin %in% origin_codes){stop("Origin code not supported")} 18 | # if (!destination %in% destination_codes){stop("Destination code not supported")} 19 | # if (origin == 'country.name'){ 20 | # dict = na.omit(countrycode::countrycode_data[,c('regex', destination)]) 21 | #}else{ 22 | dict <- na.omit(convert_cameo_data[,c("CAMEOcode", "EventDescription")]) 23 | #} 24 | # Prepare output vector 25 | destination_vector <- rep(NA, length(cameo)) 26 | # All but regex-based operations 27 | matches <- match(cameo, dict[, "CAMEOcode"]) 28 | destination_vector <- dict[matches, "EventDescription"] 29 | return(destination_vector) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /R/convert_cameo_data.R: -------------------------------------------------------------------------------- 1 | #' CAMEO code translation data frame 2 | #' 3 | #' A data frame with 310 rows and 2 columns. 4 | #' Used internally by the \code{convert_cameo()} function. 5 | #' 6 | #' \itemize{ 7 | #' \item CAMEOcode: the 310 different low-level CAMEO codes. 8 | #' \item EventDescription: Human-readable descriptions of the codes. 9 | #' } 10 | #' 11 | #' @note The current CAMEO codebook is located here: \url{http://eventdata.parusanalytics.com/data.dir/cameo.html}. 12 | #' 13 | #' @docType data 14 | #' @keywords datasets 15 | #' @name convert_cameo_data 16 | #' @usage convert_cameo_data 17 | #' @format A data frame with 310 rows and 2 columns 18 | NULL 19 | -------------------------------------------------------------------------------- /R/download_icews.R: -------------------------------------------------------------------------------- 1 | #' Download the ICEWS Dataset 2 | #' 3 | #' Download and unzip all of the data files for the ICEWS dataset from the 4 | #' Harvard Dataverse into a given directory. 5 | #' 6 | #' @param destpath The path to the directory where ICEWS should go. 7 | #' 8 | #' @return NULL 9 | #' @author Original code and concept: Tony Boyles 10 | #' @note This function is still in development and may contain errors and change quickly. 11 | #' @examples 12 | #' 13 | #' download_icews("~/ICEWS/") 14 | #' 15 | #' @rdname download_icews 16 | 17 | #' @export 18 | #' @import RCurl 19 | #' @importFrom plyr l_ply progress_text 20 | #' @import dataverse 21 | #' 22 | 23 | ## Get ICEWS links 24 | get_icewslinks <- function(dv_server, dv_key){ 25 | 26 | ## Set dataverse metadata: API key linked to phoenixNet account 27 | #Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") 28 | #Sys.setenv("DATAVERSE_KEY" = "b95cd0bd-2295-4292-9402-bf52e34a95b7") this is mine don't use it :[ 29 | 30 | ## Get ICEWS event data information 31 | icews_data <- dataverse::get_dataset('doi:10.7910/DVN/28075') 32 | icews_repos <- data.table( 33 | label = icews_data$files$filename 34 | , id = icews_data$files$id 35 | ) 36 | icews_repos <- icews_repos[grep('.tab', icews_repos$label), ] 37 | icews_metadata <- sapply(sapply(icews_repos$label, 'strsplit', '\\.'), '[[', 3) 38 | 39 | baseURL <- "https://dataverse.harvard.edu/api/access/datafile/" 40 | icews_repos[, url := paste0(baseURL, icews_repos$id)] 41 | 42 | return(icews_repos) 43 | } 44 | 45 | 46 | # given a list of links, download them and write to specified directory 47 | dw_icewsfile <- function(link, destpath, metadata = link_data){ 48 | 49 | filename <- paste0(destpath, '/', metadata[id %in% link, label]) 50 | fullURL <- metadata[id %in% link, url] 51 | 52 | # download method 53 | if (.Platform$OS.type == 'windows') { 54 | download_method <- 'auto' 55 | } else{ 56 | download_method <- 'curl' 57 | } 58 | 59 | download.file(fullURL, filename, method = download_method, quiet = T) 60 | try({unzip(filename, exdir = destpath, unzip = "internal", setTimes = FALSE)} 61 | , silent = T) 62 | 63 | 64 | # if(substr(filename, nchar(filename)-3, nchar(filename)) == '.zip'){ 65 | # unlink(temp) 66 | # } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /R/download_phoenix.R: -------------------------------------------------------------------------------- 1 | #' Download the Phoenix Dataset 2 | #' 3 | #' Download and unzip all of the data files for the Phoenix dataset from the 4 | #' Phoenix data website into a given directory. 5 | #' 6 | #' @param destpath The path to the directory where Phoenix should go. 7 | #' @param phoenix_version. Download a specific version of Phoenix ("v0.1.0" or the current version by default). 8 | #' 9 | #' @return NULL 10 | #' @author Original code credit: Andy Halterman 11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly. 12 | #' @examples 13 | #' 14 | #' download_phoenix("~/OEDA/phoxy_test/", phoenix_version = "current") 15 | #' 16 | #' @rdname download_phoenix 17 | 18 | 19 | ## Function 1: 20 | ## Process the start/end dates desired, and generate a list of 21 | ## data links to try and download. 22 | get_phoenixlinks <- function( 23 | start_date = as.Date('2014-06-20') 24 | , end_date = Sys.Date() 25 | ) { 26 | 27 | # Create a range of dates for which to download Phoenix data. 28 | dates <- seq.Date( 29 | start_date 30 | , end_date 31 | , by = 'day' 32 | ) 33 | dates <- as.integer(format(dates, '%Y%m%d')) 34 | 35 | # Access the Phoenix raw data from Amazon repository. 36 | links <- paste0( 37 | 'https://s3.amazonaws.com/oeda/data/current/events.full.' 38 | , dates 39 | , '.txt.zip' 40 | ) 41 | 42 | return(links) 43 | } 44 | 45 | ### Function 2: 46 | ## Given a single link, try to download that specific Phoenix data file. 47 | ## If that day's data is not available, notify the user with an error message. 48 | dw_phoenixfile <- function(link, destpath) { 49 | # extract filename from link 50 | m <- regexpr('[^/]*(?=\\.zip$)', link, perl = T) 51 | filename <- regmatches(link, m) 52 | 53 | # download method 54 | if (.Platform$OS.type == 'windows') { 55 | download_method <- 'auto' 56 | } else{ 57 | download_method <- 'curl' 58 | } 59 | 60 | # Attempt to download and unzip to destpath 61 | temp <- tempfile() 62 | download.file(link, temp, method = download_method, quiet = T) 63 | options(warn = 2) 64 | 65 | tryCatch( 66 | unzip(temp, exdir = destpath) 67 | , error = function(e){ 68 | message( 69 | paste( 70 | 'Unable to download file ' 71 | , filename 72 | , '. It appears that Phoenix data for this date is missing.' 73 | , sep = '' 74 | ) 75 | ) 76 | } 77 | ) 78 | 79 | options(warn = 1) 80 | unlink(temp) 81 | } 82 | -------------------------------------------------------------------------------- /R/extract_dyadstats.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Extract dyad-level statistics from a given event-network. 3 | #' 4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set 5 | #' of dyad-level statistics for output. 6 | #' 7 | #' @param input_date A date in integer %Y%m%d format. 8 | #' @param event_dnet network object object containing a set of interactions. 9 | #' 10 | #' @return net_stats Table of dyad-level statistics. 11 | #' 12 | #' @keywords phoenix, event data 13 | #' 14 | #' @import data.table 15 | #' @import countrycode 16 | #' @import reshape2 17 | #' @import statnet 18 | #' @import tsna 19 | #' @import plyr 20 | #' @import lubridate 21 | #' @import igraph 22 | #' @import intergraph 23 | #' 24 | #' @export 25 | 26 | 27 | extract_dyadstats <- function(input_date = this_date, event_dnet = tsna_obj){ 28 | 29 | ###### 30 | # 31 | # Extract daily network and convert to igraph 32 | # 33 | ###### 34 | 35 | ## Collapse to daily network 36 | net_obj <- network.collapse(event_dnet, at = input_date) 37 | 38 | ## Convert input date to an actual date object 39 | input_date <- as.Date(as.character(input_date), format = '%Y%m%d') 40 | 41 | ## Convert to igraph object via 'intergraph' for additional metrics 42 | daily_graph <- intergraph::asIgraph(net_obj) 43 | 44 | ###### 45 | # 46 | # Extract a set of DYAD-LEVEL statistics 47 | # 48 | ###### 49 | 50 | ## Community detection 51 | ic <- igraph::infomap.community(daily_graph) 52 | 53 | ## Get community membership 54 | ic_membership <- igraph::membership(ic) 55 | 56 | ## Number and size of N>1 communities detected 57 | num_ic <- length(igraph::sizes(ic)[igraph::sizes(ic) > 1]) 58 | size_ic <- sort(igraph::sizes(ic)[igraph::sizes(ic) > 1], decreasing = T) 59 | 60 | ## Convert to edgelist 61 | comm_ids <- (ic_membership[ic_membership %in% names(size_ic)]) 62 | comm_members <- which(ic_membership %in% comm_ids) 63 | comm_ids <- as.integer(as.factor(comm_ids)) 64 | comm_edgelist <- cbind(comm_ids, comm_members) 65 | 66 | ## Convert to bimodal adjacency matrix 67 | comm_membership <- matrix(0, length(unique(comm_ids)), 255) 68 | rownames(comm_membership) <- sort(unique(comm_ids)) 69 | colnames(comm_membership) <- 1:255 70 | comm_membership[comm_edgelist[,]] <- 1 71 | 72 | ## Matrix multiply to get shared membership matrix 73 | comm_adj <- t(comm_membership) %*% comm_membership 74 | 75 | ## Convert to daily edgelist 76 | comm_try <- try({ 77 | comm_ties <- data.table(input_date, which(comm_adj == 1, arr.ind = T)) 78 | }, silent = T) 79 | if(class(comm_try)[1] == 'try-error'){ 80 | comm_ties <- data.table('input_date' = NA 81 | , 'nodea' = NA 82 | , 'nodeb' = NA) 83 | } 84 | setnames(comm_ties, c('date', 'nodea', 'nodeb')) 85 | comm_ties <- comm_ties[nodea != nodeb] 86 | setkeyv(comm_ties, c('nodea', 'nodeb')) 87 | 88 | return(comm_ties) 89 | } 90 | -------------------------------------------------------------------------------- /R/extract_netstats.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Extract network-level statistics from a given event-network. 3 | #' 4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set 5 | #' of network-level statistics for output. 6 | #' 7 | #' @param input_date A date in integer %Y%m%d format. 8 | #' @param event_dnet network object object containing a set of interactions. 9 | #' 10 | #' @return net_stats Table of network-level statistics. 11 | #' 12 | #' @keywords phoenix, event data 13 | #' 14 | #' @import data.table 15 | #' @import countrycode 16 | #' @import reshape2 17 | #' @import statnet 18 | #' @import tsna 19 | #' @import plyr 20 | #' @import lubridate 21 | #' @import igraph 22 | #' @import intergraph 23 | #' 24 | #' @export 25 | 26 | 27 | extract_netstats <- function(input_date = this_date, event_dnet = event_dnet, datelist = dates){ 28 | 29 | ###### 30 | # 31 | # Extract daily network and convert to igraph 32 | # 33 | ###### 34 | 35 | ## Collapse to daily network 36 | net_obj <- network.collapse(event_dnet, at = input_date) 37 | 38 | ## Convert input date to an actual date object 39 | prev_date <- datelist[which(datelist %in% input_date) - 1] 40 | input_date <- as.Date(as.character(input_date), format = '%Y%m%d') 41 | 42 | if(network::network.edgecount(net_obj) == 0){ 43 | return(data.table(date = input_date 44 | , net_jaccard = 0, net_hamming = 0 45 | , net_degree = 0, net_density = 0 46 | , net_trans = 0, net_modularity = 0 47 | , num_communities = 0, comm_meansize = 0 48 | , xcomm_ties = 0 49 | , dyads_mut = 0, dyads_asym = 0 50 | , dyads_null = 0 51 | , triads_003 = 0, triads_012 = 0 52 | , triads_102 = 0, triads_021D = 0 53 | , triads_021U = 0, triads_021C = 0 54 | , triads_111D = 0, triads_111U = 0 55 | , triads_030T = 0, triads_030C = 0 56 | , triads_201 = 0, triads_120D = 0 57 | , triads_120U = 0, triads_120C = 0 58 | , triads_210 = 0, triads_300 = 0)) 59 | } 60 | ## Convert to igraph object via 'intergraph' for additional metrics 61 | daily_graph <- intergraph::asIgraph(net_obj) 62 | 63 | ###### 64 | # 65 | # Extract a set of NETWORK-LEVEL statistics 66 | # 67 | ###### 68 | 69 | #### Changes from previous time period 70 | try_prev <- try({ 71 | ## Get previous time period 72 | net_obj_t1 <- network.collapse(event_dnet, at = prev_date) 73 | 74 | ## Convert to matrices 75 | net_mat_t1 <- as.matrix.network(net_obj_t1) 76 | net_mat <- as.matrix.network(net_obj) 77 | 78 | ## Jaccard index 79 | net_overlap <- net_mat_t1 + net_mat 80 | net_intersect <- sum(net_overlap == 2) 81 | net_union <- sum(net_overlap >= 1) 82 | net_difference <- sum(net_overlap == 0) 83 | net_jaccard <- net_intersect / net_union 84 | 85 | ## Hamming distance 86 | net_hamming <- (net_intersect + net_difference) / length(net_mat) 87 | }, silent = T) 88 | if(class(try_prev)[1] == 'try-error'){ 89 | net_jaccard <- NA 90 | net_hamming <- NA 91 | } 92 | 93 | ## Mean degree 94 | # Since it's a mean, in- vs out-degree doesn't matter 95 | net_degree <- mean(sna::degree(as.matrix.network(net_obj), gmode = 'digraph')) 96 | 97 | ## Density 98 | net_density <- network.density(net_obj) 99 | 100 | ## Transitivity 101 | net_trans <- gtrans(net_obj, diag = F, mode = 'digraph') 102 | 103 | ## Dyad census 104 | net_dyads <- sna::dyad.census(as.matrix.network(net_obj)) 105 | dimnames(net_dyads)[[2]] <- paste0('dyad', dimnames(net_dyads)[[2]]) 106 | 107 | ## Triad census 108 | net_triads <- sna::triad.census(as.matrix.network(net_obj), mode = 'digraph') 109 | dimnames(net_triads)[[2]] <- paste0('triad', dimnames(net_triads)[[2]]) 110 | 111 | ## Community detection 112 | ic <- igraph::infomap.community(daily_graph) 113 | 114 | ## Network community modularity 115 | ic_mod <- igraph::modularity(ic) 116 | 117 | ## Number and size of N>1 communities detected 118 | num_ic <- length(igraph::sizes(ic)[igraph::sizes(ic) > 1]) 119 | size_ic <- sort(igraph::sizes(ic)[igraph::sizes(ic) > 1], decreasing = T) 120 | 121 | ## Mean community size of N>1 communities 122 | meansize_ic <- mean(size_ic) 123 | 124 | ## Share of total ties that connect different communities 125 | share_crossings <- sum(igraph::crossing(ic, daily_graph) == T) / 126 | length(igraph::crossing(ic, daily_graph)) 127 | 128 | ## Output network stats 129 | return(data.table(date = input_date 130 | , net_jaccard = net_jaccard, net_hamming = net_hamming 131 | , net_degree = net_degree, net_density = net_density 132 | , net_trans = net_trans, net_modularity = ic_mod 133 | , num_communities = num_ic, comm_meansize = meansize_ic 134 | , xcomm_ties = share_crossings 135 | , dyads_mut = net_dyads[1], dyads_asym = net_dyads[2] 136 | , dyads_null = net_dyads[3] 137 | , triads_003 = net_triads[1], triads_012 = net_triads[2] 138 | , triads_102 = net_triads[3], triads_021D = net_triads[4] 139 | , triads_021U = net_triads[5], triads_021C = net_triads[6] 140 | , triads_111D = net_triads[7], triads_111U = net_triads[8] 141 | , triads_030T = net_triads[9], triads_030C = net_triads[10] 142 | , triads_201 = net_triads[11], triads_120D = net_triads[12] 143 | , triads_120U = net_triads[13], triads_120C = net_triads[14] 144 | , triads_210 = net_triads[15], triads_300 = net_triads[16] 145 | )) 146 | 147 | } 148 | -------------------------------------------------------------------------------- /R/extract_nodestats.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Extract node-level statistics from a given event-network. 3 | #' 4 | #' INTERNAL FUNCTION: Intakes a given network object and returns a set 5 | #' of node-level statistics for output. 6 | #' 7 | #' @param input_date A date in integer %Y%m%d format. 8 | #' @param event_dnet network object object containing a set of interactions. 9 | #' 10 | #' @return net_stats Table of node-level statistics. 11 | #' 12 | #' @keywords phoenix, event data 13 | #' 14 | #' @import data.table 15 | #' @import countrycode 16 | #' @import reshape2 17 | #' @import statnet 18 | #' @import tsna 19 | #' @import plyr 20 | #' @import lubridate 21 | #' @import igraph 22 | #' @import intergraph 23 | #' 24 | #' @export 25 | 26 | extract_nodestats <- function(input_date = this_date, event_dnet = tsna_obj){ 27 | 28 | ###### 29 | # 30 | # Extract daily network and convert to igraph 31 | # 32 | ###### 33 | 34 | ## Collapse to daily network 35 | net_obj <- network.collapse(event_dnet, at = input_date) 36 | 37 | ## Convert input date to an actual date object 38 | input_date <- as.Date(as.character(input_date), format = '%Y%m%d') 39 | 40 | ## Write a weird little workaround for the final day of an empty tsna 41 | ## object: by default it is a zero-node network, which is odd. 42 | # if(network.size(net_obj) == 0){ 43 | # filler <- matrix(rep(0, 255), nrow = 1) 44 | # dimnames(filler)[[2]] <- paste0('node', 1:255) 45 | # return(rbind(as.data.table(cbind(date = input_date 46 | # , node_stat = 'trans', filler)) 47 | # , as.data.table(cbind(date = input_date 48 | # , node_stat = 'indegree', filler)) 49 | # , as.data.table(cbind(date = input_date 50 | # , node_stat = 'outdegree', filler)) 51 | # , as.data.table(cbind(date = input_date 52 | # , node_stat = 'between', filler)))) 53 | # } 54 | 55 | nodes <- network.vertex.names(net_obj) 56 | ## Convert to igraph object via 'intergraph' for additional metrics 57 | daily_graph <- intergraph::asIgraph(net_obj) 58 | 59 | ###### 60 | # 61 | # Extract a set of NODE-LEVEL statistics 62 | # 63 | ###### 64 | 65 | ## Transitivity 66 | trans_dist <- matrix(igraph::transitivity(daily_graph, type = 'local' 67 | , isolates = 'zero'), nrow = 1) 68 | dimnames(trans_dist)[[2]] <- nodes 69 | trans_dist <- as.data.table(cbind(date = input_date 70 | , node_stat = 'trans', trans_dist)) 71 | 72 | ## Degree 73 | # Indegree 74 | indegree_dist <- matrix(sna::degree(as.matrix.network(net_obj) 75 | , cmode = 'indegree' 76 | , rescale = T), nrow = 1) 77 | indegree_dist[is.nan(indegree_dist)] <- 0 78 | dimnames(indegree_dist)[[2]] <- nodes 79 | indegree_dist <- as.data.table(cbind(date = input_date 80 | , node_stat = 'indegree', indegree_dist)) 81 | 82 | # Outdegree 83 | outdegree_dist <- matrix(sna::degree(as.matrix.network(net_obj) 84 | , cmode = 'outdegree' 85 | , rescale = T), nrow = 1) 86 | outdegree_dist[is.nan(outdegree_dist)] <- 0 87 | dimnames(outdegree_dist)[[2]] <- nodes 88 | outdegree_dist <- as.data.table(cbind(date = input_date 89 | , node_stat = 'outdegree', outdegree_dist)) 90 | 91 | ## Betweenness 92 | between_dist <- matrix(sna::betweenness(as.matrix.network(net_obj) 93 | , gmode = 'digraph' 94 | , rescale = T), nrow = 1) 95 | between_dist[is.nan(between_dist)] <- 0 96 | dimnames(between_dist)[[2]] <- nodes 97 | between_dist <- as.data.table(cbind(date = input_date 98 | , node_stat = 'between', between_dist)) 99 | 100 | ## Reciprocity 101 | recip_mat <- as.matrix.network(net_obj) 102 | recip_fun <- function(position, x){ 103 | return(sum(x[position, ] == x[, position])) 104 | } 105 | recip_dist <- matrix(sapply(1:nrow(recip_mat), recip_fun, recip_mat) / nrow(recip_mat) 106 | , nrow = 1) 107 | dimnames(recip_dist)[[2]] <- nodes 108 | recip_dist <- as.data.table(cbind(date = input_date 109 | , node_stat = 'recip', recip_dist)) 110 | 111 | ## Combined metric 112 | out_data <- data.table(rbind(trans_dist, indegree_dist 113 | , outdegree_dist, between_dist, recip_dist)) 114 | 115 | dtnew <- out_data[, lapply(.SD, as.numeric)] 116 | # dtnew2 <- dtnew[, lapply(.SD, scale)] 117 | dtnew2 <- copy(dtnew) 118 | dtnew2[, date := NULL] 119 | dtnew2[, node_stat := NULL] 120 | dtnew2 <- data.frame(dtnew2) 121 | dtnew2 <- abs(dtnew2) 122 | for(i in 1:nrow(dtnew2)){ 123 | dtnew2[i, ] <- scale(as.matrix(dtnew2)[i,]) 124 | } 125 | dtnew[, node_stat := out_data$node_stat] 126 | out_data <- dtnew 127 | combined <- as.data.table(cbind(date = input_date, node_stat = 'combined1' 128 | , matrix(colSums(dtnew2), nrow = 1))) 129 | combined2 <- as.data.table(cbind(date = input_date, node_stat = 'combined2' 130 | , matrix(colSums(((dtnew2)+1)^2), nrow = 1))) 131 | setnames(combined, names(combined)[-c(1:2)], names(between_dist)[-c(1:2)]) 132 | setnames(combined2, names(combined2)[-c(1:2)], names(between_dist)[-c(1:2)]) 133 | out_data <- rbind(out_data, combined, combined2) 134 | dtnew <- out_data[, lapply(.SD, as.numeric)] 135 | dtnew[, node_stat := out_data$node_stat] 136 | out_data <- dtnew 137 | return(out_data) 138 | } 139 | -------------------------------------------------------------------------------- /R/icews_cameo.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Convert ICEWS state/actor codes into CAMEO format, 3 | #' and extract root codes from specific CAMEO event codes. 4 | #' 5 | #' Intake a set of ICEWS data (read in after some pre-processing) 6 | #' and convert entries to CAMEO format using conversion tables created 7 | #' by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 8 | #' 9 | #' @param icews ICEWS data as one large data.table 10 | #' 11 | #' @return icews ICEWS data with several new CAMEO code columns. 12 | #' 13 | #' @keywords phoenix, event data 14 | #' 15 | #' @import data.table 16 | #' @import plyr 17 | #' 18 | #' @export 19 | 20 | icews_cameo <- function(icews){ 21 | 22 | ###### 23 | # 24 | # Read in data for conversions 25 | # (created by Phil Schrodt) 26 | # 27 | ###### 28 | 29 | data(agents, envir = environment()) 30 | data(states, envir = environment()) 31 | 32 | ###### 33 | # 34 | # Functions 35 | # 36 | ###### 37 | 38 | ## Paste-function that can handle NA entries 39 | ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste) 40 | paste3 <- function(...,sep=", ") { 41 | L <- list(...) 42 | L <- lapply(L,function(x) {x[is.na(x)] <- ""; x}) 43 | ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"", 44 | gsub(paste0(sep,sep),sep, 45 | do.call(paste,c(L,list(sep=sep))))) 46 | is.na(ret) <- ret=="" 47 | ret 48 | } 49 | 50 | ## Conversion function: intake list of sectors, return the CAMEO actor 51 | ## code for the 'most important' actor in the list 52 | cameo_convert <- function(in_data){ 53 | this_source <- data.table(actor = sapply(strsplit(in_data, ','), '[', 1)) 54 | this_codes <- merge(this_source, agents, sort = F, all.x = T, by = 'actor') 55 | return(substr(this_codes$code1, 1, 3)) 56 | 57 | } 58 | 59 | ###### 60 | # 61 | # Set up data storage objects 62 | # 63 | ###### 64 | 65 | ## Ordered list of CAMEO agent codes to extract for the agent field 66 | agentcodes = c('GOV','MIL','REB','OPP', 'PTY', 'COP','JUD','SPY' 67 | ,'IGO','MED','EDU','BUS','CRM','CVL','---') 68 | 69 | ## Tables of unique source/target sectors 70 | source_table <- data.table(Source.Sectors = unique(icews$Source.Sectors) 71 | , source_codes = NA_character_) 72 | target_table <- data.table(Target.Sectors = unique(icews$Target.Sectors) 73 | , target_codes = NA_character_) 74 | 75 | ###### 76 | # 77 | # Convert ICEWS codes to CAMEO codes 78 | # 79 | ###### 80 | 81 | ## Convert unique source/target sector codes 82 | source_table[, source_codes := cameo_convert(Source.Sectors)] 83 | target_table[, target_codes := cameo_convert(Target.Sectors)] 84 | icews <- merge(icews, source_table, by = 'Source.Sectors', all.x = T, sort = F) 85 | icews <- merge(icews, target_table, by = 'Target.Sectors', all.x = T, sort = F) 86 | 87 | ## Convert unique state codes 88 | setnames(states, c('Source.Country', 'source_isoc', 'source_cown')) 89 | icews <- merge(icews, states, by = 'Source.Country', all.x = T, sort = F) 90 | setnames(states, c('Target.Country', 'target_isoc', 'target_cown')) 91 | icews <- merge(icews, states, by = 'Target.Country', all.x = T, sort = F) 92 | 93 | ###### 94 | # 95 | # Generate source/actor entity codes a la CAMEO 96 | # 97 | ###### 98 | 99 | icews[, sourceactorentity := paste3(icews$source_isoc, icews$source_codes, sep = '')] 100 | icews[, targetactorentity := paste3(icews$target_isoc, icews$target_codes, sep = '')] 101 | 102 | ###### 103 | # 104 | # Extract root codes from CAMEO codes 105 | # 106 | ###### 107 | 108 | for(i in seq(10, 200, by = 10)){ 109 | icews[eventcode %in% c(i:(i+9), c((i*10+11):(i*10+90))), rootcode := i/10] 110 | } 111 | 112 | ###### 113 | # 114 | # Write out cleaned ICEWS data 115 | # 116 | ###### 117 | 118 | return(icews) 119 | } 120 | -------------------------------------------------------------------------------- /R/ingest_histphoenix.R: -------------------------------------------------------------------------------- 1 | #' Ingest the historic Phoenix Dataset 2 | #' 3 | #' Given a directory with the historic Phoenix dataset files, quickly read 4 | #' them all in, name them correctly, and combine them into one dataframe. 5 | #' 6 | #' @param histphoenix_loc The path to the Phoenix folder. 7 | #' @param start_date 8 | #' @param end_date 9 | #' 10 | #' @return A single dataframe with all the historic Phoenix events in the folder. 11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly. 12 | #' @examples 13 | #' 14 | #' events <- ingest_histphoenix("~/histphoenix") 15 | #' 16 | #' @import data.table 17 | #' @import countrycode 18 | #' @import bit64 19 | #' @rdname ingest_histphoenix 20 | #' @export 21 | 22 | ingest_histphoenix <- function( 23 | histphoenix_loc 24 | , .start_date = start_date 25 | , .end_date = end_date 26 | , .statelist = statelist 27 | ){ 28 | 29 | ## Identify appropriate files in the historic Phoenix folder - don't want 30 | ## to try and read in PDF docs or .csv metadata tables 31 | histphoenix_files <- list.files(histphoenix_loc)[ 32 | intersect( 33 | grep('Phoenix', list.files(histphoenix_loc)) 34 | , grep('csv', list.files(histphoenix_loc)) 35 | ) 36 | ] 37 | 38 | ## Read in Phoenix files from historic sources 39 | histphoenix_data <- data.table() 40 | for(filename in histphoenix_files){ 41 | this_phoenix <- fread(paste(histphoenix_loc, filename, sep = '/')) 42 | this_phoenix$aid <- bit64::as.integer64(this_phoenix$aid) 43 | histphoenix_data <- rbind( 44 | histphoenix_data 45 | , this_phoenix 46 | ) 47 | } 48 | 49 | ###### 50 | # 51 | # Parse the historic Phoenix data 52 | # 53 | ###### 54 | 55 | ###################################### 56 | ## IMPORTANT ASSUMPTION HERE: 57 | ## I am *ASSUMING* that NULL/NA entries after a state code 58 | ## implies that the actor is the GOVERNMENT. As such I am replacing 59 | ## all such missing entries with 'GOV'. 60 | ###################################### 61 | histphoenix_data[source_root %in% .statelist 62 | & (source_agent == '' 63 | | source_agent == 'GOV') 64 | , source := paste0(source_root, 'GOV')] 65 | histphoenix_data[target_root %in% .statelist 66 | & (target_agent == '' 67 | | source_agent == 'GOV') 68 | , target := paste0(target_root, 'GOV')] 69 | 70 | ## Drop any missing data 71 | histphoenix_data <- histphoenix_data[!is.na(code)] 72 | histphoenix_data <- histphoenix_data[!is.na(root_code)] 73 | histphoenix_data <- histphoenix_data[!is.na(goldstein)] 74 | histphoenix_data <- histphoenix_data[!is.na(quad_class)] 75 | 76 | ## Parse dates 77 | histphoenix_data$date <- as.Date(histphoenix_data$story_date, format = '%m/%d/%Y') 78 | 79 | return(histphoenix_data) 80 | } 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /R/ingest_icews.R: -------------------------------------------------------------------------------- 1 | #' Ingest the ICEWS Event Dataset 2 | #' 3 | #' Given a directory with individual ICEWS dataset files, quickly read 4 | #' them all in, name them correctly, and combine them into one dataframe. 5 | #' 6 | #' @param dir The path to the ICEWS folder. 7 | #' @param start_date Start of date range as YYYYMMDD integer format. 8 | #' @param end_date End of date range as YYYYMMDD integer format. 9 | #' 10 | #' @return A single dataframe with all the ICEWS events in the folder. 11 | #' @author Andy Halterman, forked by Jesse Hammond 12 | #' @note This function is still in development and may contain errors and change quickly. 13 | #' @examples 14 | #' 15 | #' events <- ingest_icews("~/ICEWS/study_28075/Data/", 20101201, 20140101) 16 | #' 17 | #' @rdname ingest_icews 18 | #' @export 19 | 20 | ingest_icews <- function(dir, start_date, end_date){ 21 | # Handle messy file paths 22 | lastletter <- stringr::str_sub(dir ,-1, -1) 23 | if (lastletter != "/"){ 24 | dir <- paste0(dir, "/") 25 | } 26 | 27 | ## List files 28 | files <- list.files(dir) 29 | 30 | ## Quick regex in case of zips still there 31 | files <- files[grep("\\.tab$", files)] 32 | 33 | ## Pull files that fall in the date range provided 34 | startyear <- as.integer(substr(start_date, 1, 4)) 35 | endyear <- as.integer(substr(end_date, 1, 4)) 36 | filesyears <- as.integer( 37 | do.call('rbind', (stringr::str_split(files, '\\.')))[, 2]) 38 | if(endyear > max(filesyears)){ 39 | message('Note: specified range exceeds the most recent ICEWS entries.') 40 | } 41 | files <- files[filesyears >= startyear & filesyears <= endyear] 42 | files <- paste0(dir, files) 43 | 44 | ## Set column dtypes 45 | coltypes <- c('integer', rep('character', 5), 'integer', 'numeric' 46 | , rep('character', 3), 'integer', 'integer' 47 | , rep('character', 5), 'numeric', 'numeric') 48 | 49 | ## Quick and dirty: fread all files 50 | read_one <- function(file){ 51 | t <- tryCatch(fread(file, stringsAsFactors = F, sep = '\t' 52 | , colClasses = coltypes, na.strings = '') 53 | , error = function(e) message(paste0('error reading ', file))) 54 | if(is.null(t) == F){ 55 | return(t) 56 | } else { 57 | message('object is not a data.frame') 58 | } 59 | } 60 | message("Reading in files...") 61 | event_list <- plyr::llply(files, read_one, .progress = plyr::progress_text(char = '=')) 62 | 63 | # Bind everything together 64 | events <- rbindlist(event_list) 65 | 66 | if(nrow(events) > 0){ 67 | # Set names 68 | names(events) <- c("event_id", "date", "Source.Name", "Source.Sectors", 69 | "Source.Country", "Event.Text", "eventcode", "goldstein", "Target.Name", 70 | "Target.Sectors", "Target.Country", "Story.ID", "Sentence.Number", 71 | "Publisher", "City", "District", "Province", "Country", "Latitude", 72 | "Longitude") 73 | # Use lubridate, then de-POSIX the date. 74 | events$date <- as.Date(lubridate::ymd(events$date)) 75 | message("Process complete") 76 | return(events) 77 | 78 | } else{ 79 | events <- data.table(date = structure(NA_real_, class="Date") 80 | , sourceactorentity = NA_character_ 81 | , targetactorentity = NA_character_ 82 | , rootcode = NA_integer_ 83 | , eventcode = NA_integer_ 84 | , goldstein = NA_real_) 85 | message("Process complete") 86 | return(events) 87 | } 88 | } 89 | 90 | 91 | -------------------------------------------------------------------------------- /R/ingest_phoenix.R: -------------------------------------------------------------------------------- 1 | #' Ingest the Phoenix Dataset 2 | #' 3 | #' Given a directory with individual Phoenix dataset files, quickly read 4 | #' them all in, name them correctly, and combine them into one dataframe. 5 | #' 6 | #' @param phoenix_loc The path to the Phoenix folder. 7 | #' @param start_date Start of date range as YYYYMMDD integer format. 8 | #' @param end_date End of date range as YYYYMMDD integer format. 9 | #' 10 | #' @return A single dataframe with all the Phoenix events in the folder. 11 | #' @author Andy Halterman, forked by Jesse Hammond 12 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly. 13 | #' @examples 14 | #' 15 | #' events <- ingest_phoenix("~/OEDA/phoxy_test/", 20140620, 20150101) 16 | #' 17 | #' @import data.table 18 | #' @rdname ingest_phoenix 19 | #' @export 20 | 21 | ingest_phoenix <- function(phoenix_loc, start_date, end_date){ 22 | # Handle messy file paths 23 | lastletter <- stringr::str_sub(phoenix_loc ,-1, -1) 24 | if (lastletter != "/"){ 25 | phoenix_loc <- paste0(phoenix_loc, "/") 26 | } 27 | 28 | ## List files 29 | files <- list.files(phoenix_loc) 30 | 31 | ## Pull files that fall in the date range provided 32 | filesdates <- as.integer( 33 | do.call('rbind', (stringr::str_split(files, '\\.')))[, 3]) 34 | filesdates <- as.Date(lubridate::ymd(filesdates)) 35 | if(start_date < min(filesdates)){ 36 | message('Note: specified range precedes the earliest Phoenix data.') 37 | } 38 | if(end_date > max(filesdates)){ 39 | message('Note: specified range exceeds the latest Phoenix data. IT\'S NOT A CRYSTAL BALL PEOPLE') 40 | } 41 | files <- files[filesdates >= start_date & filesdates <= end_date] 42 | files <- paste0(phoenix_loc, files) 43 | 44 | ## Set column dtypes 45 | coltypes <- c('character', rep('integer', 4), rep('character', 10) 46 | , 'integer', 'numeric', 'character', 'numeric' 47 | , 'numeric', rep('character', 6)) 48 | ## Set column name 49 | phoenix_names <- c('eventid', 'date', 'year', 'month', 'day' 50 | , 'sourceactorfull', 'sourceactorentity', 'sourceactorrole' 51 | , 'sourceactorattribute', 'targetactorfull', 'targetactorentity' 52 | , 'targetactorrole', 'targetactorattribute', 'eventcode' 53 | , 'rootcode', 'pentaclass', 'goldstein', 'issues' 54 | , 'lat', 'long', 'locationname', 'statename', 'countrycode' 55 | , 'sentenceid', 'urls', 'newssources') 56 | 57 | ## Quick and dirty: fread all files 58 | read_one <- function(file){ 59 | t <- tryCatch(data.table::fread(file, stringsAsFactors = F, sep = '\t' 60 | , colClasses = coltypes, na.strings = '') 61 | , error = function(e) message(paste0('error reading ', file))) 62 | if(is.null(t) == F){ 63 | return(t) 64 | } else { 65 | message('object is not a data.frame') 66 | } 67 | } 68 | 69 | message("Reading in files...") 70 | event_list <- plyr::llply(files, read_one, .progress = plyr::progress_text(char = '=')) 71 | 72 | ## Bind everything together 73 | events <- data.table::rbindlist(event_list) 74 | data.table::setnames(events, phoenix_names) 75 | 76 | ## Convert codes to INTEGER type 77 | suppressWarnings(events$eventcode <- as.integer(events$eventcode)) 78 | suppressWarnings(events$rootcode <- as.integer(events$rootcode)) 79 | 80 | if(nrow(events) > 0){ 81 | ## Convert dates to DATE object 82 | events$date <- as.Date(lubridate::ymd(events$date)) # use lubridate, then de-POSIX the date. 83 | message("Process complete") 84 | return(events) 85 | 86 | } else{ 87 | events <- data.table(date = structure(NA_real_, class="Date") 88 | , sourceactorentity = NA_character_ 89 | , targetactorentity = NA_character_ 90 | , rootcode = NA_integer_ 91 | , eventcode = NA_integer_ 92 | , goldstein = NA_real_) 93 | message("Process complete") 94 | return(events) 95 | } 96 | } 97 | 98 | 99 | -------------------------------------------------------------------------------- /R/phoenix_stats.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Extract statistics from daily Phoenix event-networks. 3 | #' 4 | #' Take a set of daily networks generated by the phoenix_net function, 5 | #' and extract a variety of daily statistics at the network and nodal 6 | #' levels. 7 | #' 8 | #' 9 | #' @param dailynets networkDynamic object containing daily event-nets 10 | #' produced via phoenix_net function. 11 | #' @param time_window time interval of aggregate event-network objects. Valid 12 | #' entries are 'day', 'week', 'month', 'year'. 13 | #' @param codes string of event codes, root codes, or pentaclass codes. 14 | #' Note: these codes have to be in the same format as the original 15 | #' network layers created via 'phoenix_net'. If you specify rootcodes 16 | #' in the creation step, entering pentaclass codes in this step 17 | #' will produce an error. 18 | #' @param do_parallel Logical TRUE-FALSE. Whether to use parallel backend 19 | #' 'doMC' when extracting network statistics by code. Considerably 20 | #' faster than single-core, but less reliable. 21 | #' 22 | #' 23 | #' @return phoenix_out a LIST object of tables containing descriptive 24 | #' statistics for daily event-networks. 25 | #' 26 | #' @keywords phoenix, event data 27 | #' 28 | #' @import data.table 29 | #' @import countrycode 30 | #' @import reshape2 31 | #' @import statnet 32 | #' @import tsna 33 | #' @import plyr 34 | #' @import lubridate 35 | #' @import igraph 36 | #' @import intergraph 37 | #' @import doMC 38 | #' 39 | #' @export 40 | 41 | phoenix_stats <- function(dailynets, time_window = 'day' 42 | , codes = 'all', do_parallel = F, n_cores = 4){ 43 | 44 | ###### 45 | # 46 | # Set up some initial values 47 | # 48 | ###### 49 | 50 | ## Initialize parallel cores 51 | if(do_parallel == T){ 52 | doMC::registerDoMC(cores=n_cores) 53 | } 54 | 55 | ## Subset codes 56 | if(codes == 'all'){ 57 | codes <- names(dailynets) 58 | } else{ 59 | codes <- paste0('code', codes) 60 | } 61 | 62 | ## Set up dates 63 | start_end <- get.network.attribute( 64 | dailynets[[1]],'net.obs.period')$observations[[1]] 65 | start_date_int <- start_end[1] 66 | end_date_int <- start_end[2] 67 | start_date <- as.Date(as.character(start_date_int), format = '%Y%m%d') 68 | end_date <- as.Date(as.character(end_date_int), format = '%Y%m%d') 69 | dates <- as.integer(format(seq.Date(start_date, end_date, time_window), '%Y%m%d')) 70 | dates <- dates[-length(dates)] 71 | ndates <- length(dates) 72 | nodes <- network.vertex.names(dailynets[[1]]) 73 | 74 | ###### 75 | # 76 | # Set up some empty storage objects 77 | # 78 | ###### 79 | 80 | # Storage for daily network outputs 81 | master_data <- vector('list', length(codes)) 82 | names(master_data) <- as.character(codes) 83 | 84 | for(code in codes){ 85 | 86 | ## Extract one set of daily event-networks 87 | event_dnet <- dailynets[[code]] 88 | 89 | ## Extract network-level statistics 90 | message(paste0('Extracting network statistics for code ' 91 | , substr(code, 5, nchar(code)), ' ...')) 92 | master_data[[code]]$netstats <- data.table( 93 | plyr::ldply(dates, extract_netstats, event_dnet = event_dnet 94 | , datelist = dates 95 | # , .progress = 'text' 96 | , .parallel = do_parallel)) 97 | master_data[[code]]$netstats$net_jaccard[1] <- NA 98 | master_data[[code]]$netstats$net_hamming[1] <- NA 99 | 100 | ## Extract dyad-level statistics 101 | message(paste0('Extracting dyadic shared-community statistics for code ' 102 | , substr(code, 5, nchar(code)), ' ...')) 103 | master_data[[code]]$dyadstats <- data.table( 104 | plyr::ldply(dates, extract_dyadstats, event_dnet = event_dnet 105 | # , .progress = 'text' 106 | , .parallel = do_parallel 107 | )) 108 | 109 | ## Extract node-level statistics 110 | message(paste0('Extracting nodal centrality and transitivity statistics for code ' 111 | , substr(code, 5, nchar(code)), ' ...')) 112 | master_data[[code]]$nodestats <- data.table( 113 | plyr::ldply(dates, extract_nodestats, event_dnet = event_dnet 114 | # , .progress = 'text' 115 | , .parallel = do_parallel)) 116 | 117 | } 118 | 119 | return(master_data) 120 | } 121 | 122 | -------------------------------------------------------------------------------- /R/phoenix_tables.R: -------------------------------------------------------------------------------- 1 | #' Scrape, merge, and process Phoenix and ICEWS data into 2 | #' a large data table for aggregation and subsetting. 3 | #' 4 | #' 5 | #' @param phoenix_loc folder containing Phoenix data sets as daily .csv 6 | #' data tables. Automatically checks for new data sets each time 7 | #' the function is run, and downloads new daily data as it becomes 8 | #' available. Currently in 'one-and'done' format 9 | #' where it downloads the first time, and checks thereafter. 10 | #' @param icews_loc folder containing ICEWS data sets as daily .tab data 11 | #' tables. Because I don't know how to work a SWORD API, these will 12 | #' need to be manually downloaded and updated. 13 | #' 14 | #' @return master_table a data.table object containing ALL merged/processed 15 | #' Phoenix and ICEWS data. One row per event-dyad-day. 16 | #' 17 | #' @rdname phoenix_tables 18 | #' 19 | #' @author Jesse Hammond 20 | #' 21 | #' @note This function is still in early development and may contain significant errors. 22 | #' Don't trust it. 23 | #' 24 | 25 | #' @export 26 | #' 27 | #' @import data.table 28 | #' @import countrycode 29 | #' @import lubridate 30 | #' @import dummies 31 | phoenix_tables <- function(phoenix_loc, icews_loc, update = T){ 32 | 33 | ###### 34 | # 35 | # Set up some initial values: Time windows 36 | # 37 | ###### 38 | 39 | ## Date objects 40 | start_date <- as.Date('1995-01-01') 41 | end_date <- Sys.Date() 42 | dates <- seq.Date(start_date, end_date, by = 'day') 43 | 44 | ###### 45 | # 46 | # Set up some initial values: Actors 47 | # 48 | ###### 49 | 50 | ## Paste-function that can handle NA entries 51 | ## (http://stackoverflow.com/questions/13673894/suppress-nas-in-paste) 52 | paste3 <- function(...,sep=", ") { 53 | L <- list(...) 54 | L <- lapply(L,function(x) {x[is.na(x)] <- ""; x}) 55 | ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"", 56 | gsub(paste0(sep,sep),sep, 57 | do.call(paste,c(L,list(sep=sep))))) 58 | is.na(ret) <- ret=="" 59 | ret 60 | } 61 | 62 | ## Set up set of secondary actor codes 63 | secondary_actors <- c('GOV', 'MIL', 'REB', 'OPP', 'PTY', 'COP', 'JUD' 64 | , 'SPY', 'MED', 'EDU', 'BUS', 'CRM', 'CVL') 65 | statelist <- countrycode::countrycode_data$iso3c 66 | actors <- unique(statelist[statelist %in% states$isoc]) 67 | actors <- actors[!is.na(actors)] 68 | actors <- c(actors, unique(as.vector(outer(actors, secondary_actors, paste, sep = '')))) 69 | actors <- as.factor(sort(actors)) 70 | n <- length(actors) 71 | 72 | ###### 73 | # 74 | # Set up some initial values: Event codes 75 | # 76 | ###### 77 | 78 | ## Factor variables describing CAMEO categories 79 | rootcodes <- factor(1:20) 80 | levels(rootcodes) <- as.character(1:20) 81 | 82 | eventcodes <- factor(1:298) 83 | levels(eventcodes) <- as.character( 84 | c(10:21, 211:214, 22:23, 231:234, 24, 241:244, 25, 251:256, 26:28, 30:31 85 | , 311:314, 32:33, 331:334, 34, 341:344, 35, 351:356, 36:46, 50:57 86 | , 60:64, 70:75, 80:81, 811:814, 82:83, 831:834, 84, 841:842, 85:86 87 | , 861:863, 87, 871:874, 90:94, 100:101, 1011:1014, 102:103, 1031:1034 88 | , 104, 1041:1044, 105, 1051:1056, 106:108, 110:112, 1121:1125, 113:116 89 | , 120:121, 1211:1214, 122, 1221:1224, 123, 1231:1234, 124, 1241:1246 90 | , 125:129, 130:131, 1311:1313, 132, 1321:1324, 133:138, 1381:1385 91 | , 139:141, 1411:1414, 142, 1421:1424, 143, 1431:1434, 144, 1441:1444 92 | , 145, 1451:1454, 150:155, 160:162, 1621:1623, 163:166, 1661:1663 93 | , 170:171, 1711:1712, 172, 1721:1724, 173:176, 180:182, 1821:1823, 183 94 | , 1831:1834, 184:186, 190:195, 1951:1952, 196, 200:204, 2041:2042) 95 | ) 96 | 97 | pentaclasses <- factor(0:4) 98 | levels(pentaclasses) <- as.character(0:4) 99 | 100 | ###### 101 | # 102 | # Set up some empty storage objects 103 | # 104 | ###### 105 | 106 | # Storage for comparison of Phoenix and ICEWS reporting overlap 107 | filler <- rep(NA, length(dates)) 108 | sources_overlap <- data.table(date = dates 109 | , phoenix_only = filler 110 | , icews_only = filler 111 | , both_sources = filler) 112 | 113 | ###### 114 | # 115 | # Download raw files from Phoenix data repo and ICEWS dataverse. 116 | # 117 | ###### 118 | 119 | ## Download new Phoenix data tables. This will download the entire 120 | ## archive the first time this function is run and fully populate 121 | ## the destination folder. 122 | 123 | if(update == T){ 124 | message('Checking Phoenix data...') 125 | update_phoenix(destpath = phoenix_loc, phoenix_version = 'current') 126 | } 127 | 128 | 129 | ## Check to see if ICEWS folder exists and that it has at least one 'valid' 130 | ## ICEWS data table stored. 131 | message('Checking ICEWS data...') 132 | icews_checkfile <- 'events.2000.20150313082808.tab' 133 | icews_files <- list.files(icews_loc) 134 | if(!icews_checkfile %in% icews_files){ 135 | stop('Please enter a valid path that contains the ICEWS yearly files.') 136 | } else { 137 | message('ICEWS file location is valid.') 138 | } 139 | 140 | ###### 141 | # 142 | # Read and parse ICEWS data for merging. 143 | # 144 | ###### 145 | 146 | ## Read and parse ICEWS data 147 | message('Ingesting ICEWS data...') 148 | icews_data <- ingest_icews(icews_loc, start_date, end_date) 149 | 150 | ## Clean ICEWS data and format to Phoenix-style CAMEO codes 151 | ## for actors and states 152 | message('Munging ICEWS data...') 153 | icews_data <- icews_cameo(icews_data) 154 | 155 | ## Subset ICEWS data to only keep key columns 156 | icews_data <- icews_data[, list(date, sourceactorentity 157 | , targetactorentity, rootcode 158 | , eventcode, goldstein)] 159 | icews_data[, source := 'icews'] 160 | 161 | ## Modify more complex ICEWS actor codes 162 | icews_data[nchar(sourceactorentity) == 9, sourceactorentity := 163 | paste0(substr(icews_data[nchar(sourceactorentity) == 9, sourceactorentity], 1, 3) 164 | , substr(icews_data[nchar(sourceactorentity) == 9, sourceactorentity], 7, 9)) 165 | ] 166 | 167 | icews_data[nchar(targetactorentity) == 9, targetactorentity := 168 | paste0(substr(icews_data[nchar(targetactorentity) == 9, targetactorentity], 1, 3) 169 | , substr(icews_data[nchar(targetactorentity) == 9, targetactorentity], 7, 9)) 170 | ] 171 | 172 | ###### 173 | # 174 | # Read and parse Phoenix data for merging. 175 | # 176 | ###### 177 | 178 | ## Read and parse Phoenix data 179 | message('Ingesting Phoenix data...') 180 | phoenix_data <- ingest_phoenix(phoenix_loc = phoenix_loc 181 | , start_date = start_date 182 | , end_date = end_date) 183 | 184 | ## Subset Phoenix data to only keep key columns 185 | phoenix_data <- phoenix_data[, list(date, paste3(sourceactorentity 186 | , sourceactorrole, sep = '') 187 | , paste3(targetactorentity 188 | , targetactorrole, sep = '') 189 | , rootcode, eventcode, goldstein)] 190 | setnames(phoenix_data, c('V2', 'V3') 191 | , c('sourceactorentity', 'targetactorentity')) 192 | phoenix_data[, source := 'phoenix'] 193 | 194 | ###### 195 | # 196 | # Combine ICEWS and Phoenix data 197 | # 198 | ###### 199 | 200 | try({ 201 | master_data <- rbind(icews_data, phoenix_data) 202 | }, silent = T) 203 | if(class(master_data)[1] == 'try-error'){ 204 | message('Specified range does not include Phoenix data.') 205 | master_data <- icews_data 206 | } 207 | setnames(master_data, c('sourceactorentity', 'targetactorentity') 208 | , c('actora', 'actorb')) 209 | 210 | ## Drop any missing data 211 | master_data <- master_data[complete.cases(master_data), ] 212 | 213 | ## Create new variable: Pentaclass (0-4) 214 | master_data[rootcode %in% c(1, 2), pentaclass := 0L] 215 | master_data[rootcode %in% c(3, 4, 5), pentaclass := 1L] 216 | master_data[rootcode %in% c(6, 7, 8), pentaclass := 2L] 217 | master_data[rootcode %in% c(9, 10, 11, 12, 13, 16), pentaclass := 3L] 218 | master_data[rootcode %in% c(14, 15, 17, 18, 19, 20), pentaclass := 4L] 219 | 220 | ###################################### 221 | ## IMPORTANT ASSUMPTION HERE: 222 | ## I am *ASSUMING* that NULL/NA entries after a state code 223 | ## implies that the actor is the GOVERNMENT. As such I am replacing 224 | ## all such missing entries with 'GOV'. 225 | ###################################### 226 | master_data[actora %in% countrycode::countrycode_data$iso3c 227 | , actora := paste0(actora, 'GOV')] 228 | master_data[actorb %in% countrycode::countrycode_data$iso3c 229 | , actorb := paste0(actorb, 'GOV')] 230 | 231 | ###### 232 | # 233 | # Pre-format data by de-duplicating, cleaning dates and actors, 234 | # and dropping unused columns 235 | # 236 | ###### 237 | 238 | ## De-duplicate 239 | master_data <- unique(master_data) 240 | 241 | ## Subset events and columns: only events that: 242 | ## 1. involve specified actor set on both side (as ENTITIES) 243 | ## 2. involve TWO DIFFERENT actors (i.e. no self-interactions 244 | ## as specified by user) 245 | master_data <- master_data[(actora %in% actors 246 | & actorb %in% actors)] 247 | master_data <- master_data[actora != actorb] 248 | master_data[, actora := factor(actora, levels = levels(actors))] 249 | master_data[, actorb := factor(actorb, levels = levels(actors))] 250 | 251 | ## Set CAMEO coded event/root codes to factors 252 | master_data[, rootcode := factor(rootcode, levels = rootcodes)] 253 | master_data$eventcode <- gsub('!', '', master_data$eventcode) 254 | master_data[, eventcode := factor(as.integer(eventcode), levels = eventcodes)] 255 | master_data[, pentaclass := factor(pentaclass, levels = pentaclasses)] 256 | 257 | ## Set keys 258 | setkeyv(master_data, c('date', 'actora', 'actorb', 'eventcode', 'source')) 259 | 260 | 261 | ###### 262 | # 263 | # Export : how much overlap between Phoenix and ICEWS reporting? 264 | # 265 | ###### 266 | 267 | ## Create some temporary flag variables 268 | master_data[, dup_fromtop := duplicated( 269 | master_data[, list(date, actora, actorb, rootcode, eventcode)])] 270 | master_data[, dup_frombot := duplicated( 271 | master_data[, list(date, actora, actorb, rootcode, eventcode)], fromLast = T)] 272 | 273 | ## Export data on reporting overlap 274 | # Phoenix reporting only 275 | dates_tab <- data.table(date = dates) 276 | phoenix_only <- master_data[, sum(dup_fromtop == F 277 | & source == 'phoenix'), by = date] 278 | phoenix_only <- merge(dates_tab, phoenix_only, by = 'date', all.x = T) 279 | phoenix_only[is.na(V1), V1 := 0] 280 | sources_overlap$phoenix_only <- phoenix_only$V1 281 | 282 | # ICEWS reporting only 283 | icews_only <- master_data[, sum(dup_frombot == F 284 | & source == 'icews'), by = date] 285 | icews_only <- merge(dates_tab, icews_only, by = 'date', all.x = T) 286 | icews_only[is.na(V1), V1 := 0] 287 | sources_overlap$icews_only <- icews_only$V1 288 | 289 | # Both sources report 290 | both_sources <- master_data[, sum(dup_fromtop == T), by = date] 291 | both_sources <- merge(dates_tab, both_sources, by = 'date', all.x = T) 292 | both_sources[is.na(V1), V1 := 0] 293 | sources_overlap$both_sources <- both_sources$V1 294 | 295 | ## Drop flags and source variable 296 | master_data[, dup_fromtop := NULL] 297 | master_data[, dup_frombot := NULL] 298 | master_data[, source := NULL] 299 | 300 | ## Drop duplicated variables 301 | master_data <- unique(master_data) 302 | 303 | ## Subset events: keep only events within date range 304 | master_data <- master_data[date %in% dates] 305 | 306 | ## Create list of all actors in data set for output 307 | main_actors <- actors[!actors %in% statelist] 308 | 309 | ## BIG DUMMY SECTION: dummy out all categorical event/root/pentaclass codes 310 | master_data <- data.table(dummy.data.frame(master_data, names = c('pentaclass', 'rootcode', 'eventcode'))) 311 | 312 | return(list(diagnostics = sources_overlap, netdata = master_data, actorlist = main_actors)) 313 | } 314 | -------------------------------------------------------------------------------- /R/states_doc.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' ICEWS CAMEO actor codes 3 | #' 4 | #' Merge table to convert states to CAMEO format using conversion tables created 5 | #' by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data(states) 10 | #' 11 | #' @keywords datasets 12 | #' 13 | #' 14 | "states" 15 | -------------------------------------------------------------------------------- /R/update_icews.R: -------------------------------------------------------------------------------- 1 | #' Update a local directory of ICEWS dataset files with new files from the server 2 | #' 3 | #' Checks the contents of a directory containing ICEWS event data files, checks whether the 4 | #' server has new events, and downloads them to that directory. (It'll have some version handling ability, 5 | #' too, either from the file names or by reading in the events.) 6 | #' 7 | #' @param destpath The path to download ICEWS into. 8 | #' 9 | #' @return NULL 10 | #' @author Original concept and code for Phoenix: Andy Halterman 11 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly. 12 | #' @examples 13 | #' 14 | 15 | #' @import Rcurl 16 | #' @import dataverse 17 | #' @export 18 | #' 19 | update_icews <- function(destpath){ 20 | # pulls all the links from the ICEWS dataverse 21 | link_data <- EventNetworks::get_icewslinks() 22 | link_filelist <- link_data[, label] 23 | link_filelist <- sapply(link_filelist, 'substr', 1, 30) 24 | 25 | ## Identify whether local ICEWS data exists 26 | icews_files <- list.files(destpath) 27 | 28 | ## Determine what needs to be updated/downloaded 29 | icews_links_years <- substr(link_filelist, 1, 11) 30 | icews_files_years <- substr(icews_files, 1, 11) 31 | icews_delete <- icews_files[!icews_files %in% link_filelist] 32 | icews_download <- link_filelist[!link_filelist %in% icews_files] 33 | 34 | if(length(icews_download) == 0){ 35 | message('ICEWS data is current through the most recent month.') 36 | } 37 | else{ 38 | message('Updating ICEWS with most recent data release...') 39 | 40 | ## Delete out-of-date ICEWS files 41 | if(length(icews_delete) > 0){ 42 | file.remove(paste0(destpath, '/', icews_delete)) 43 | } 44 | ids <- link_data[label %in% names(icews_download), id] 45 | 46 | message("Downloading and unzipping files.") 47 | plyr::l_ply( 48 | ids 49 | , EventNetworks:::dw_icewsfile 50 | , destpath = destpath 51 | , metadata = link_data 52 | , .progress = plyr::progress_text(char = '=') 53 | ) 54 | } 55 | 56 | } 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /R/update_phoenix.R: -------------------------------------------------------------------------------- 1 | #' Update a local directory of Phoenix dataset files with new files from the server 2 | #' 3 | #' Checks the contents of a directory containing Phoenix event data files, checks whether the 4 | #' server has new events, and downloads them to that directory. (It'll have some version handling ability, 5 | #' too, either from the file names or by reading in the events.) 6 | #' 7 | #' @param destpath The path to download Phoenix into. 8 | #' 9 | #' @return NULL 10 | #' @note This function, like Phoenix, is still in development and may contain errors and change quickly. 11 | #' @examples 12 | #' 13 | #' @import Rcurl 14 | #' @export 15 | #' 16 | update_phoenix <- function(destpath){ 17 | # pulls all the links from the OEDA Phoenix page 18 | links <- EventNetworks::get_phoenixlinks() 19 | links_shortened <- as.data.frame(stringr::str_match(links, "events.full.(\\d+).txt"), stringsAsFactors=FALSE) 20 | filelist <- list.files(destpath) 21 | filelist_shortened <- as.data.frame(stringr::str_match(filelist, "events.full.(\\d+).txt"), stringsAsFactors=FALSE) 22 | # All rows in links_shortened that do not have a match in filelist_shortened. 23 | new_files <- dplyr::anti_join(links_shortened, filelist_shortened, by = "V2") 24 | if(nrow(new_files) == 0){ 25 | message('Phoenix data is current through today.') 26 | } 27 | else{ 28 | message("There are ", nrow(new_files), " undownloaded daily files. Downloading now...") 29 | ll <- paste0("https://s3.amazonaws.com/oeda/data/current/", new_files$V1, ".zip") 30 | message("Downloading and unzipping files.") 31 | plyr::l_ply(ll, EventNetworks:::dw_phoenixfile, destpath = destpath, .progress = plyr::progress_text(char = '=')) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `EventNetworks` 2 | ===== 3 | 4 | Download, process, and transform historic and live-updated event data from the Phoenix and ICEWS repositories to temporal event-network structures for analysis. 5 | 6 | Package description 7 | ------------ 8 | `EventNetworks` includes a set of functions designed to gather event data, format and process these data, convert them into daily networks of interactions between states in the international system. 9 | 10 | Data sources 11 | ------------ 12 | The package can intake any or all of three different data sources: 13 | 14 | 1. For events occurring between 1995 and one year behind the present day, it can use the public release of the [ICEWS data](https://dataverse.harvard.edu/dataverse/harvard?q=icews) for events occurring between 15 | 1995 and one year behind the present day. 16 | 2. For events occurring from June 2014 through the present, it can use 17 | the [daily live-updated Phoenix data](http://phoenixdata.org/data/current) created and 18 | released through the [Open Event Data Project](http://openeventdata.org). 19 | 3. For events occurring from January 1945 to December 2015, it can use the [historic Phoenix data](http://www.clinecenter.illinois.edu/data/speed/phoenix/)created and maintained by the Cline Center for Democracy at the University of Illinois Urbana-Champaign. The historic Phoenix is based on three data sets (the New York Times, the BBC Summary of World Broadcasts, and the CIA's Foreign Broadcast Information Service) with differing timespan, which means that the base content will vary depending on the time specified. 20 | 21 | Note that this means there are some time periods where events will be constructed 22 | using more than one data source. For example, events in 2015 are drawn from ICEWS, the live-updated Phoenix, and the historic Phoenix BBC SWB records. In cases where sources overlap, event records are de-duplicated based on the event-dyad-day tuple. I advise careful comparison of time periods with partial multiple-source overlap, as even with de-duplication this can introduce some significant changes in the number and coverage of reported events. 23 | 24 | The main function 'eventNetworks' intakes raw data files based on the provided folder locations for live Phoenix, historic Phoenix, and ICEWS data sets. If files are not found for live Phoenix and ICEWS (both of which are regularly updated), it will automatically attempt to download the full data sets. If the "Update" argument is set as TRUE, it will also compare the existing files to the online repositories for the dates requested. 25 | 26 | More information on data sets 27 | ------------ 28 | The current development of the Phoenix Data Project is a collaborative effort between Caerus Associates (Erin Simpson, Andrew Halterman, and John Beieler), Parus Analytics (Phil Schrodt), The University of Texas at Dallas (Patrick Brandt), The Cline Center for Democracy at the University of Illinois at Urbana-Champaign, and The University of Oklahoma. Visit the Phoenix website here: [http://phoenixdata.org/](http://phoenixdata.org/) and the website of the Open Event Data Alliance here: [http://openeventdata.org/](http://openeventdata.org). 29 | 30 | The historic Phoenix data files are maintained by the Cline Center for Democracy at the University of Illinois Urbana-Champaign. Visit the Cline Center's website here: [http://www.clinecenter.illinois.edu/data/speed/phoenix/](http://www.clinecenter.illinois.edu/data/speed/phoenix/). 31 | 32 | The Integrated Crisis Early Warning System (ICEWS) public-release data is an ongoing data-coding and analysis initiative hosted at Lockheed Martin, originally funded through DARPA, and more recently funded through the Office of Naval Research. ICEWS data is being released monthly through the Harvard Dataverse, with a 12-month (give or take a few months) lag: as of 12/10/2017, ICEWS data is available through the end of November 2016. For more information, visit [The W-ICEWS site](http://www.lockheedmartin.com/us/products/W-ICEWS/W-ICEWS_Team/Publications.html) at Lockheed Martin. 33 | 34 | Current status and updates 35 | ------------ 36 | `EventNetworks` is still in a very early stage of development, and is likely to change significantly over time. 37 | 38 | __Recent changes:__ 39 | 1. I have integrated functionality for processing and converting the historic Phoenix data released by the Cline Center for Democracy at UIUC. These data can be used on their own or in conjunction with the live Phoenix and ICEWS data. This means it's now possible to generate event-networks for the international system going all the way back to 1945 (although at that point you're relying solely on the NYT digitized record, with all the accompanying geographic and substantive bias one would anticipate). 40 | 2. I have finally gotten around to fixing the major bottleneck in ICEWS data processing from complex actor descriptors to the standardized CAMEO actor set. This change significantly speeds up processing time. 41 | 3. EventNetworks now outputs data as a __list of array objects__. Each list entry corresponds to one time unit, and each array has dimensions (_k_ x _k_ x _m_) where _k_ = number of actors and _m_ = number of network layers, or number of unique tie types returned. 42 | 43 | 44 | Long-term to-Do List 45 | ------------ 46 | - [x] Add support for specifying a subset of actors to examine, or a 'container' (e.g., a state) within which to examine all actors. 47 | - [x] Add support for specifying the level of temporal aggregation for event-networks (e.g., day/week/month/year). 48 | - [x] Add support for specifying a particular class of interactions to extract and examine (e.g., rootcodes 2,4,6). 49 | - [x] Increase efficiency of network-stats extraction module by enabling parallelization of plyr functions using doMC backend. 50 | - [x] Add support for using the Cline Center's recent historic Phoenix data releases (http://www.clinecenter.illinois.edu/data/speed/phoenix/) 51 | - [x] Speed up the internal functions, particularly the ICEWS-to-CAMEO-code conversion. This is a major bottleneck. 52 | - [ ] Circle back around to update and improve the network-stats functionality to extract and present more useful information from the generated networks. 53 | - [ ] Clean up the documentation and clarify arguments to make it easier for others to use. 54 | - [ ] Set up more informative error messages :) 55 | 56 | 57 | Installation 58 | ------------ 59 | `devtools::install_github("jrhammond/EventNetworks")` 60 | ``` 61 | > pacman::p_load(EventNetworks) 62 | > 63 | > sample_data <- EventNetworks::eventNetworks( 64 | + start_date = 20140101 65 | + , end_date = 20150101 66 | + , level = 'pentaclass' 67 | + , dv_key = [personal Harvard dataverse redacted] 68 | + , phoenix_loc = '/Users/localadmin/Box Sync/DataSets/phoenix' 69 | + , icews_loc = '/Users/localadmin/Box Sync/DataSets/icews' 70 | + , histphoenix_loc = '/Users/localadmin/Box Sync/DataSets/CCHPED_v2017_06_30' 71 | + , dv_server = 'harvard.dataverse.edu' 72 | + , update = F 73 | + , actorset = 'states' 74 | + , codeset = 'all' 75 | + , time_window = 'month' 76 | + , code_subset = 'all' 77 | + , tie_type = 'count' 78 | + , sources = 'all' 79 | + ) 80 | Checking ICEWS data... 81 | Ingesting ICEWS data... 82 | Reading in files... 83 | |===============================================================| 100% 84 | Process complete 85 | Munging ICEWS data... 86 | Ingesting Phoenix data... 87 | Note: specified range precedes the earliest Phoenix data. 88 | Reading in files... 89 | |===============================================================| 100% 90 | Process complete 91 | Ingesting historic Phoenix data... 92 | Read 817955 rows and 25 (of 25) columns from 0.096 GB file in 00:00:03 93 | Read 1092211 rows and 25 (of 25) columns from 0.133 GB file in 00:00:03 94 | Read 2906715 rows and 25 (of 25) columns from 0.373 GB file in 00:00:08 95 | ``` 96 | -------------------------------------------------------------------------------- /data/agentnames.txt~: -------------------------------------------------------------------------------- 1 | Government GOV GOV 2 | Executive GOV GOV1 3 | Executive Office GOV GOV2 4 | Cabinet GOV GOV3 5 | Agriculture / Fishing / Forestry Ministry GOVAGR GOVAGR 6 | Finance / Economy / Commerce / Trade Ministry GOVBUS GOVBUS 7 | Defense / Security Ministry GOVMIL GOVMIL 8 | Education Ministry GOVEDU GOVEDU 9 | Energy Ministry GOVENV GOV4 10 | Environment Ministry GOVENV GOVENV 11 | Transportation Ministry GOVENV GOVENV5 12 | Food Ministry GOVENV GOVENV6 13 | Disaster Ministry GOVENV GOVENV7 14 | Foreign Ministry GOV GOVENV8 15 | Health Ministry GOVHLH GOVHLH 16 | Interior / Home Ministry GOV GOV9 17 | Industrial / Textiles / Mining Ministry GOVBUS GOVHLH10 18 | Science / Tech / Knowledge / Innovation Ministry GOVHLH GOVHLH11 19 | NGO Ministry GOV GOVHLH12 20 | Labor Ministry GOVLAB GOVLAB 21 | Post / Tecoms Ministry GOVBUS GOVLAB13 22 | Science / Tech Ministry GOVBUS GOVLAB14 23 | Water Ministry GOVLAB GOVLAB15 24 | Women / Children / Social / Welfare / Development / Religion Ministry GOVDEV GOVDEV 25 | Justice / Law Ministry GOVJUD GOVJUD 26 | Tourism Ministry GOVJUD GOVJUD16 27 | Drugs Ministry GOVJUD GOVJUD17 28 | Human Rights Ministry GOVHRI GOVHRI 29 | Elections Ministry GOVHRI GOVHRI18 30 | Housing / Construction Ministry GOVHRI GOVHRI19 31 | Intelligence Ministry SPY SPY 32 | Information / Communication / Transparency Ministry GOVMED GOVMED 33 | State Media GOVMED GOVMED1 34 | Management / Budget / Planning / Organization Ministry GOV GOV2 35 | State Owned Enterprises GOVBUS GOVBUS20 36 | State-Owned Agricultural GOVBUS GOVBUS21 37 | State-Owned Transportation GOVBUS GOVBUS22 38 | State-Owned Utilities GOVBUS GOVBUS23 39 | State-Owned Heavy Industrial / Chemical GOVBUS GOVBUS24 40 | State-Owned Defense / Security GOVBUS GOVBUS25 41 | State-Owned Durable Goods GOVBUS GOVBUS26 42 | State-Owned Consumer Goods GOVBUS GOVBUS27 43 | State-Owned Consumer Services GOVBUS GOVBUS28 44 | State-Owned Consulting / Financial Services GOVBUS GOVBUS29 45 | State-Owned Science / Tech / Knowledge / Innovation GOVBUS GOVBUS30 46 | State-Owned Medical / Health / Pharmeceutical GOVBUS GOVBUS31 47 | Police COP COP 48 | National / Border Divisions COP COP32 49 | Provincial Divisions COP COP33 50 | Municipal Divisions COP COP34 51 | Military MIL MIL 52 | Military Intelligence MILSPY MILSPY 53 | Military Intelligence Headquarters MILSPY MILSPY35 54 | Military Intelligence Special Forces MILSPY MILSPY36 55 | Military Intelligence Infantry / Regular MILSPY MILSPY37 56 | Military Intelligence Mechanized (Ships, Tanks, Planes) MILSPY MILSPY38 57 | Military Intelligence Education / Training MILSPY MILSPY39 58 | Military Intelligence Support MILSPY MILSPY40 59 | Military Intelligence Medical MILSPY MILSPY41 60 | Research And Design Wings MIL MIL42 61 | Research And Design Wings Headquarters MIL MIL43 62 | Research And Design Wings Education / Training MIL MIL44 63 | Research And Design Wings Support MIL MIL45 64 | Research And Design Wings Medical MIL MIL46 65 | Army MIL MIL47 66 | Army Headquarters MIL MIL48 67 | Army Special Forces MIL MIL49 68 | Army Infantry / Regular MIL MIL50 69 | Army Mechanized (Ships, Tanks, Planes) MIL MIL51 70 | Army Education / Training MIL MIL52 71 | Army Support MIL MIL53 72 | Army Medical MIL MIL54 73 | Navy MIL MIL55 74 | Navy Headquarters MIL MIL56 75 | Navy Special Forces MIL MIL57 76 | Navy Infantry / Regular MIL MIL58 77 | Navy Mechanized (Ships, Tanks, Planes) MIL MIL59 78 | Navy Education / Training MIL MIL60 79 | Navy Support MIL MIL61 80 | Navy Medical MIL MIL62 81 | Air Force MIL MIL63 82 | Air Force Headquarters MIL MIL64 83 | Air Force Special Forces MIL MIL65 84 | Air Force Infantry / Regular MIL MIL66 85 | Air Force Mechanized (Ships, Tanks, Planes) MIL MIL67 86 | Air Force Education / Training MIL MIL68 87 | Air Force Support MIL MIL69 88 | Air Force Medical MIL MIL70 89 | Marines MIL MIL71 90 | Marines Headquarters MIL MIL72 91 | Marines Special Forces MIL MIL73 92 | Marines Infantry / Regular MIL MIL74 93 | Marines Mechanized (Ships, Tanks, Planes) MIL MIL75 94 | Marines Education / Training MIL MIL76 95 | Marines Support MIL MIL77 96 | Marines Medical MIL MIL78 97 | Coast Guard MIL MIL79 98 | Coast Guard Headquarters MIL MIL80 99 | Coast Guard Special Forces MIL MIL81 100 | Coast Guard Infantry / Regular MIL MIL82 101 | Coast Guard Mechanized (Ships, Tanks, Planes) MIL MIL83 102 | Coast Guard Education / Training MIL MIL84 103 | Coast Guard Support MIL MIL85 104 | Coast Guard Medical MIL MIL86 105 | Legislative / Parliamentary LEG LEG 106 | Upper House LEG LEG87 107 | Lower House LEG LEG88 108 | Unicameral LEG LEG89 109 | Judicial JUD JUD 110 | National / Supreme Court JUD JUD90 111 | Provincial Court JUD JUD91 112 | Municipal / District Court JUD JUD92 113 | Civil Court JUD JUD93 114 | Religious Court JUD JUD94 115 | Military / Tribunal JUD JUD95 116 | Local JUD JUD96 117 | Provincial JUD JUD97 118 | Municipal JUD JUD98 119 | Government Religious GOVREL GOVREL 120 | Parties PTY PTY 121 | (National) Major Party PTY PTY99 122 | Opposition Major Party (Out Of Government) OPPPTY OPPPTY 123 | Government Major Party (In Government) GOVPTY GOVPTY 124 | (National) Minor Party PTY PTY100 125 | Opposition Minor Party (Out Of Government) OPPPTY OPPPTY101 126 | Government Minor Party (In Government) GOVPTY GOVPTY102 127 | Provincial Party PTY PTY103 128 | Opposition Provincial Party (Out Of Government) OPPPTY OPPPTY104 129 | Government Provincial Party (In Government) GOVPTY GOVPTY105 130 | Municipal Party PTY PTY106 131 | Opposition Municipal Party (Out Of Government) OPPPTY OPPPTY107 132 | Government Municipal Party (In Government) GOVPTY GOVPTY108 133 | Dissident OPP OPP109 134 | Criminals / Gangs CRM CRM 135 | Protestors / Popular Opposition / Mobs OPP OPP 136 | Exiles OPP OPP110 137 | Banned Parties OPPPTY OPP111 138 | Radicals / Extremists / Fundamentalists REB RAD 139 | Organized Violent REB RAD112 140 | Rebel REB REB 141 | Insurgents REB INS 142 | Separatists REB SEP 143 | Social CVL CIV113 144 | Agricultural BUS BUSAGR 145 | Business BUS BUS 146 | Agricultural Business BUS BUS122 147 | Transportation Business BUS BUS123 148 | Utilities Business BUS BUS124 149 | Heavy Industrial / Chemical Business BUS BUS125 150 | Defense / Security Business BUS BUS126 151 | Durable Goods Business BUS BUS127 152 | Consumer Goods Business BUS BUS128 153 | Consumer Services Business BUS BUS129 154 | Consulting / Financial Services Business BUS BUS130 155 | Science / Tech / Knowledge / Innovation Business BUS BUS131 156 | Medical / Health / Pharmeceutical Business BUS BUS132 157 | Education EDU EDU 158 | Student EDU EDU115 159 | National Ethnic CVL CVL116 160 | National Ethnic Majority CVL CVL117 161 | National Ethnic Minority CVL CVL118 162 | General Population / Civilian / Social CVL CVL 163 | Labor LAB LAB 164 | Legal JUD JUD114 165 | Media MED MED 166 | News MED MED133 167 | Print News MED MED134 168 | Radio News MED MED135 169 | Television News MED MED136 170 | Online News MED MED137 171 | Entertainment MED MED138 172 | Print Entertainment MED MED139 173 | Radio Entertainment MED MED140 174 | Television Entertainment MED MED141 175 | Online Entertainment MED MED142 176 | Medical / Health MED MED99e 177 | Refugees / Displaced REF REF 178 | National Religious REF REF119 179 | Religious Majority REF REF120 180 | Religious Minority REF REF121 181 | Nongovernmental Organizations / Activists IGO IGO143 182 | Agricultural NGOs IGOAGR IGO148 183 | Business NGOs IGOBUS IGO144 184 | Education NGOs IGOEDU IGO146 185 | Energy NGOs IGO IGO147 186 | Environment NGOs IGOENV IGOENV 187 | Ethnic NGOs IGOEDU IGO 188 | Development NGOs IGODEV IGODEV 189 | Human Rights NGOs IGOHRI IGOHRI 190 | Information / Communication / Transparency NGOs IGO IGO150 191 | Labor NGOs IGOLAB IGO154 192 | Legal NGOs IGOHRI IGOHRI185 193 | Media NGOs IGO IGO438 194 | Medical / Health NGOs IGOHLH IGOHLH 195 | Refugees / Displaced NGOs IGOREF IGO145 196 | Charity NGOs IGOHLH IGOHLH149 197 | Elite CVL ELI 198 | Unidentified Forces UAF UAF 199 | Unaffiliated Sectors iSec iSec 200 | International Religious REL REL 201 | Atheist ATH ATH 202 | Animist PAG PAG 203 | Shamanist SHA SHA 204 | Muslim MOS MOS 205 | Alewi MOSALE MOSALE 206 | Sunni MOSSUN MOSSUN 207 | Shia MOSSHI MOSSHI 208 | Sufi MOSSFI MOSSFI 209 | Druze MOSDRZ MOSDRZ 210 | Hindu HIN HIN 211 | Christian CHR CHR 212 | Catholic CHRCTH CHRCTH 213 | Protestant CHRPRO CHRPRO 214 | Coptic CHRCPT CHRCPT 215 | Jehovah's Witness CHRJHW CHRJHW 216 | Maronite CHRMRN CHRMRN 217 | Orthodox CHRDOX CHRDOX 218 | Buddhist BUD BUD 219 | Jewish JEW JEW 220 | Daoist TAO REL152 221 | Shinto REL REL153 222 | Sikh SIK SIK 223 | Ahmadiyya SIK SIK154 224 | Jain JAN JAN 225 | Mormon LDS LDS 226 | Baha'I BAH BAH 227 | Zoroastrian / Mazdi ZRO ZRO 228 | Confucian CON CON 229 | International Ethnic REL REL155 230 | Chechen CNY CNY 231 | Kashmiri KAS KAS 232 | Acehnese KAS KAS156 233 | Han KAS KAS157 234 | Tamil TAM TAM 235 | Sinhalese SNL SNL 236 | Bodo SNL SNL158 237 | Shan SNL SNL159 238 | Mizo SNL SNL160 239 | Chakma CKM CKM 240 | Kuki CKM CKM161 241 | Tripuri CKM CKM162 242 | Moro CKM CKM163 243 | Uyghur UIG UIG 244 | Hmong UIG UIG164 245 | Karen UIG UIG165 246 | Assamese UIG UIG167 247 | Tibetan TIB TIB 248 | Albanian (Ethnic Group) ABN ABN 249 | Arab ARB ARB 250 | Arab, Arabian ARB ARB16691 251 | Arab, Hassaniya ARB ARB16692 252 | Arab, Levant ARB ARB16693 253 | Arab, Libyan ARB ARB16694 254 | Arab, Maghreb ARB ARB16695 255 | Arab, Shuwa ARB ARB16696 256 | Arab, Sudan ARB ARB16697 257 | Arab, Yemeni ARB ARB16698 258 | Bedouin BED BED 259 | Bedouin, Arabian BED BED16716 260 | Bedouin, Saharan BED BED16717 261 | Croat (Ethnic Group) CRO CRO 262 | Gypsy GYP GYP 263 | Hausa HAU HAU 264 | Hutu HUT HUT 265 | Ibo IBO IBO 266 | Ljaw IJW IJW 267 | Krahn KRH KRH 268 | Kurd (Ethnic Group) KUR KUR 269 | Mandingoe MAN MAN 270 | Ogoni OGO OGO 271 | Palestinian PAL PAL 272 | Serb (Ethnic Group) SER SER 273 | Slav SLA SLA 274 | Slav, Eastern SLA SLA16862 275 | Slav, Southern SLA SLA16863 276 | Slav, Western SLA SLA16864 277 | Tuareg TRG TRG 278 | Turk (Ethnic Group) TRK TRK 279 | Tutsi TUT TUT 280 | Yoruba YRB YRB 281 | Josua Project Ethnic Groups ETN ETN 282 | Aborigine ETN ETN16680 283 | Aceh of Sumatra ETN ETN16681 284 | Adamawa-Ubangi ETN ETN16682 285 | Adi ETN ETN16683 286 | Afar ETN ETN16684 287 | Aimaq ETN ETN16685 288 | Albanian ETN ETN16686 289 | Altaic ETN ETN16687 290 | Amazon ETN ETN16688 291 | Anglo-American ETN ETN16689 292 | Anglo-Celt ETN ETN16690 293 | Armenian ETN ETN16699 294 | Assyrian / Aramaic ETN ETN16700 295 | Atlantic ETN ETN16701 296 | Atlantic-Jola ETN ETN16702 297 | Atlantic-Wolof ETN ETN16703 298 | Aymara ETN ETN16704 299 | Azerbaijani ETN ETN16705 300 | Aztec ETN ETN16706 301 | Bali-Sasak ETN ETN16707 302 | Baloch ETN ETN16708 303 | Baltic ETN ETN16709 304 | Banjar of Kalimantan ETN ETN16710 305 | Bantu ETN ETN16711 306 | Bantu, Makua-Yao ETN ETN16712 307 | Bantu, Cameroon-Bamileke ETN ETN16908 308 | Bantu, Central-Congo ETN ETN16909 309 | Bantu, Central-East ETN ETN16910 310 | Bantu, Central-Lakes ETN ETN16911 311 | Bantu, Central-Luba ETN ETN16912 312 | Bantu, Central-South ETN ETN16913 313 | Bantu, Central-Southeast ETN ETN16914 314 | Bantu, Central-Southwest ETN ETN16915 315 | Bantu, Central-Tanzania ETN ETN16916 316 | Bantu, Chewa-Sena ETN ETN16917 317 | Bantu, East-Coastal ETN ETN16918 318 | Bantu, Gikuyu-Kamba ETN ETN16919 319 | Bantu, Kongo ETN ETN16920 320 | Bantu, Nguni ETN ETN16921 321 | Bantu, Northwest ETN ETN16922 322 | Bantu, Shona ETN ETN16923 323 | Bantu, Sotho-Tswana ETN ETN16924 324 | Bantu, Southeastern ETN ETN16925 325 | Bantu, Swahili ETN ETN16926 326 | Makua-Yao ETN ETN16712 327 | Cameroon-Bamileke ETN ETN16908 328 | Central-Congo ETN ETN16909 329 | Central-East ETN ETN16910 330 | Central-Lakes ETN ETN16911 331 | Central-Luba ETN ETN16912 332 | Central-South ETN ETN16913 333 | Central-Southeast ETN ETN16914 334 | Central-Southwest ETN ETN16915 335 | Central-Tanzania ETN ETN16916 336 | Chewa-Sena ETN ETN16917 337 | East-Coastal ETN ETN16918 338 | Gikuyu-Kamba ETN ETN16919 339 | Kongo ETN ETN16920 340 | Nguni ETN ETN16921 341 | Northwest ETN ETN16922 342 | Shona ETN ETN16923 343 | Sotho-Tswana ETN ETN16924 344 | Southeastern ETN ETN16925 345 | Swahili ETN ETN16926 346 | Barito of Kalimantan ETN ETN16713 347 | Basque ETN ETN16714 348 | Batak-Nias of Sumatra ETN ETN16715 349 | Beja ETN ETN16718 350 | Bengali ETN ETN16719 351 | Benue ETN ETN16720 352 | Berber-Saharan ETN ETN16721 353 | Berber-Kabyle ETN ETN16722 354 | Berber-Riff ETN ETN16723 355 | Berber-Shawiya ETN ETN16724 356 | Berber-Shilha ETN ETN16725 357 | Bhil ETN ETN16726 358 | Bhojpur-Maithili ETN ETN16727 359 | Bhutanese ETN ETN16728 360 | Bihari ETN ETN16729 361 | Bouyei ETN ETN16730 362 | Brahui ETN ETN16731 363 | Bugi-Makassar of Sulawesi ETN ETN16732 364 | Bungku-Bajau ETN ETN16733 365 | Burmese ETN ETN16734 366 | Borneo-Kalimantan ETN ETN16735 367 | Afro-Caribbean ETN ETN16736 368 | Afro-Caribbean, Anglophone ETN ETN16737 369 | Afro-Caribbean, Dutch ETN ETN16906 370 | Afro-Caribbean, Francophone ETN ETN16907 371 | Anglophone ETN ETN16737 372 | Dutch ETN ETN16906 373 | Francophone ETN ETN16907 374 | Caucasus ETN ETN16738 375 | Chadic ETN ETN16739 376 | Kanuri-Saharan ETN ETN16740 377 | Cham ETN ETN16741 378 | Chinese ETN ETN16742 379 | Chinese-Hui ETN ETN16743 380 | Egyptian ETN ETN16744 381 | Ethiopian ETN ETN16745 382 | Fiji ETN ETN16746 383 | Filipino ETN ETN16747 384 | Filipino, Central ETN ETN16748 385 | Filipino, Muslim ETN ETN16749 386 | Filipino, Tribal ETN ETN16750 387 | Finno-Ugric ETN ETN16751 388 | Finno-Ugric, Saami ETN ETN16752 389 | Flores-Sumba-Alor ETN ETN16753 390 | French ETN ETN16754 391 | Fulani / Fulbe ETN ETN16755 392 | Garo-Tripuri ETN ETN16756 393 | Germanic ETN ETN16757 394 | Gond ETN ETN16758 395 | Gorontalo of Sulawesi ETN ETN16759 396 | Greek ETN ETN16760 397 | Guarani ETN ETN16761 398 | Guera-Naba of Chad ETN ETN16762 399 | Guinean ETN ETN16763 400 | Gujarati ETN ETN16764 401 | Gur ETN ETN16765 402 | Hani ETN ETN16766 403 | Hindi ETN ETN16767 404 | Hispanic ETN ETN16768 405 | Hungarian ETN ETN16769 406 | Igbo ETN ETN16770 407 | Ijaw ETN ETN16771 408 | Inuit ETN ETN16772 409 | Italian ETN ETN16773 410 | Japanese ETN ETN16774 411 | Jat ETN ETN16775 412 | Jawa ETN ETN16776 413 | Jews ETN ETN16777 414 | Kaili-Tomini of Sulawesi ETN ETN16778 415 | Kannada ETN ETN16779 416 | Kazakh ETN ETN16780 417 | Khoisan ETN ETN16781 418 | Kyrgyz ETN ETN16782 419 | Korean ETN ETN16783 420 | Kru ETN ETN16784 421 | Kuki-Chin-Naga ETN ETN16785 422 | Lampung of Sumatra ETN ETN16787 423 | Lao ETN ETN16788 424 | Li ETN ETN16789 425 | Lisu ETN ETN16790 426 | Madura of Java ETN ETN16791 427 | Malagasy ETN ETN16792 428 | Malay ETN ETN16793 429 | Malayali ETN ETN16794 430 | Maldivian ETN ETN16795 431 | Malinke ETN ETN16796 432 | Malinke-Bambara ETN ETN16797 433 | Malinke-Jula ETN ETN16798 434 | Maltese ETN ETN16799 435 | Maluku ETN ETN16800 436 | Maluku, Central ETN ETN16801 437 | Maluku, Northern ETN ETN16802 438 | Maluku, Southern ETN ETN16803 439 | Manchu ETN ETN16804 440 | Mande ETN ETN16805 441 | Marathi-Konkani ETN ETN16806 442 | Maya ETN ETN16807 443 | Melayu of Sumatra ETN ETN16808 444 | Miao / Hmong ETN ETN16809 445 | Micronesian ETN ETN16810 446 | Minahasa-Sangir of Sulawesi ETN ETN16811 447 | Minangkabau-Rejang of Sumatra ETN ETN16812 448 | Miri-Kachin ETN ETN16813 449 | Mixe ETN ETN16814 450 | Mixteco ETN ETN16815 451 | Mizo-Lushai ETN ETN16816 452 | Mongolian ETN ETN16817 453 | Mon-Khmer ETN ETN16818 454 | Munda-Santal ETN ETN16819 455 | Musi of Sumatra ETN ETN16820 456 | Nepali-Pahari ETN ETN16821 457 | New Caledonia ETN ETN16822 458 | New Guinea ETN ETN16823 459 | Nilotic ETN ETN16824 460 | North American Indigenous ETN ETN16825 461 | Nosu ETN ETN16826 462 | Nuba Mountains ETN ETN16827 463 | Nubian ETN ETN16828 464 | Nupe ETN ETN16829 465 | Nuristan ETN ETN16830 466 | Ogan of Sumatra ETN ETN16831 467 | Omotic ETN ETN16832 468 | Oraon ETN ETN16833 469 | Oriya ETN ETN16834 470 | Oromo ETN ETN16835 471 | Other Central American Indigenous ETN ETN16836 472 | Other Hispanic American ETN ETN16837 473 | Other Pacific Islanders ETN ETN16838 474 | Other Southeast Asian ETN ETN16839 475 | Other South Asian ETN ETN16840 476 | Other Sub-Saharan African ETN ETN16841 477 | Otomi ETN ETN16842 478 | Ouaddai-Fur ETN ETN16843 479 | Parsee ETN ETN16844 480 | Pasemah of Sumatra ETN ETN16845 481 | Pashtun ETN ETN16846 482 | Persian ETN ETN16847 483 | Polynesian ETN ETN16848 484 | Portuguese ETN ETN16849 485 | Portuguese, Brazilian ETN ETN16850 486 | Portuguese, European ETN ETN16851 487 | Punjabi ETN ETN16852 488 | Pygmy ETN ETN16853 489 | Quechua ETN ETN16854 490 | Rajasthan ETN ETN16855 491 | Romanian ETN ETN16856 492 | South Himalaya ETN ETN16857 493 | Sara-Bagirmi ETN ETN16858 494 | Scandinavian ETN ETN16859 495 | Sindhi ETN ETN16860 496 | Sinhala ETN ETN16861 497 | Solomons ETN ETN16865 498 | Somali ETN ETN16866 499 | Songhai ETN ETN16867 500 | Soninke ETN ETN16868 501 | South American Indigenous ETN ETN16869 502 | Spanish ETN ETN16870 503 | Sudanic ETN ETN16871 504 | Sunda-Betawi of Java ETN ETN16872 505 | Susu ETN ETN16873 506 | Tai ETN ETN16874 507 | Tai Dam ETN ETN16875 508 | Tai-Kadai ETN ETN16876 509 | Taiwan Indigenous ETN ETN16877 510 | Tajik ETN ETN16878 511 | Talysh ETN ETN16879 512 | Telugu ETN ETN16880 513 | Thai ETN ETN16881 514 | Timor ETN ETN16882 515 | Toraja of Sulawesi ETN ETN16883 516 | Tukangbesi of Sulawesi ETN ETN16884 517 | Turkish ETN ETN16885 518 | Turkmen ETN ETN16886 519 | Ural-Siberian ETN ETN16887 520 | Urdu Muslim ETN ETN16888 521 | Uzbek ETN ETN16889 522 | Vanuatu ETN ETN16890 523 | Vietnamese ETN ETN16891 524 | West China / Lolo ETN ETN16892 525 | West Malaysia Indigenous ETN ETN16893 526 | Yao-Mien ETN ETN16894 527 | Zapoteco ETN ETN16895 528 | Zhuang ETN ETN16896 529 | Undefined ETN ETN16897 530 | Arab World ETN ETN16898 531 | Caucasian Peoples ETN ETN16899 532 | Sub-Saharan African ETN ETN16900 533 | Luri-Bakhtiari ETN ETN16901 534 | Deaf ETN ETN16902 535 | Afro-American ETN ETN16903 536 | Afro-American, Hispanic ETN ETN16904 537 | Afro-American, Northern ETN ETN16905 538 | Banda ETN ETN533 539 | Ideological --- OPP 540 | Nationalist --- OPP173 541 | Fundamentalist -- OPP174 542 | Secular --- OPP175 543 | Tribalist --- OPP176 544 | Communist --- OPP177 545 | Center Left --- OPP178 546 | Center Right --- OPP179 547 | Far Left --- OPP180 548 | Far Right --- OPP181 549 | Centrist --- OPP182 550 | Libertarian --- OPP183 551 | Anarchist --- OPP184 552 | Nongovernmental Organization (International) IGO IGO 553 | Medical / Health IGOs IGOHLH IGOHLH 554 | Business IGOs IGOBUS IGOBUS 555 | Refugees IGOs IGOREF IGOREF 556 | Education IGOs IGOEDU IGOEDU 557 | Development IGOs IGODEV IGODEV 558 | Energy IGOs IGOBUS IGOBUS 559 | Agricultural IGOs IGOAGR IGOAGR 560 | Human Rights IGOs IGOHRI IGOHRI 561 | Charity IGOs IGO IGO 562 | Information / Communication / Transparency IGOs IGO IGO 563 | Environment IGOs IGO IGO196 564 | Legal IGOs IGO IGO9f7 565 | International Government Organization IGO IGO 566 | Global IGO IGO 567 | Global Information / Communication / Transparency IGOs IGOMED IGOMED 568 | Global Energy IGOs IGOBUS IGOBUS 569 | Global Diplomatic IGOs IGO IGO 570 | Global Defense / Security IGOs IGOMIL IGOMIL 571 | Global Law / Justice / Judicial IGOs IGOJUD IGOJUD 572 | Global Environment IGOs IGOENV IGOENV 573 | Global Economic/Financial/Trade IGOs IGOBUS IGOBUS 574 | Global Development IGOs IGODEV IGODEV 575 | Global Health IGOs IGOHLH IGOHLH 576 | Global Human Rights IGOs IGOHRI IGOHRI 577 | Global Agricultural IGOs IGOAGR IGOAGR 578 | Global Refugees IGOs IGOREF IGOREF 579 | Regional IGO IGO 580 | Regional Information / Communication / Transparency IGOs IGOMED IGOMED 581 | Regional Energy IGOs IGOBUS IGOBUS 582 | Regional Diplomatic IGOs IGO IGO 583 | Regional Defense / Security IGOs IGOMIL IGOMIL 584 | Regional Law / Justice / Judicial IGOs IGOJUD IGOJUD 585 | Regional Environment IGOs IGOENV IGOENV 586 | Regional Economic/Financial/Trade IGOs IGOBUS IGOBUS 587 | Regional Development IGOs IGODEV IGODEV 588 | Regional Health IGOs IGOHLH IGOHLH 589 | Regional Human Rights IGOs IGOHRI IGOHRI 590 | Regional Agricultural IGOs IGOAGR IGOAGR 591 | Regional Refugees IGOs IGOREF IGOREF 592 | International Dissident INT INT 593 | International Criminals / Gangs INT INT205 594 | International Protestors / Popular Opposition / Mobs INT INT206 595 | International Banned Parties INT INT207 596 | International Exiles INT INT208 597 | International Radicals / Extremists / Fundamentalists IMG IMG 598 | International Terrorists IMG IMG210 599 | International Rebels IMG IMG211 600 | International Insurgents IMG IMG212 601 | International Separatists IMG IMG213 602 | Multinational Corporation MNC MNC 603 | Agricultural MNCs MNC MNC215 604 | Transportation MNCs MNC MNC216 605 | Utilities MNCs MNC MNC217 606 | Heavy Industrial / Chemical MNCs MNC MNC218 607 | Defense / Security MNCs MNC MNC219 608 | Durable Goods MNCs MNC MNC220 609 | Consumer Goods MNCs MNC MNC221 610 | Consumer Services MNCs MNC MNC222 611 | Consulting / Financial Services MNCs MNC MNC223 612 | Science / Tech / Knowledge / Innovation MNCs MNC MNC224 613 | Medical / Health / Pharmeceutical MNCs MNC MNC225 614 | NULL ——— -------------------------------------------------------------------------------- /data/agents.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/agents.RData -------------------------------------------------------------------------------- /data/convert_cameo_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/convert_cameo_data.RData -------------------------------------------------------------------------------- /data/states.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrhammond/EventNetworks/77d7b76763ef8214ab837e6d4bfd86b94bd22b21/data/states.RData -------------------------------------------------------------------------------- /man/agents.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/agents_doc.R 3 | \docType{data} 4 | \name{agents} 5 | \alias{agents} 6 | \title{ICEWS CAMEO actor codes} 7 | \format{An object of class \code{data.table} (inherits from \code{data.frame}) with 614 rows and 3 columns.} 8 | \usage{ 9 | data(agents) 10 | } 11 | \description{ 12 | Merge table to convert actors to CAMEO format using conversion tables created 13 | by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/convert_cameo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_cameo.R 3 | \name{convert_cameo} 4 | \alias{convert_cameo} 5 | \alias{countrycode} 6 | \title{Convert CAMEO Codes} 7 | \usage{ 8 | convert_cameo(cameo) 9 | } 10 | \arguments{ 11 | \item{cameo}{Vector of CAMEO event codes.} 12 | } 13 | \description{ 14 | Converts CAMEO codes. CAMEO is an event ontology used in event data projects, including Phoenix. 15 | } 16 | \examples{ 17 | events$Description <- phoxy::convert_cameo(events$EventCode) # Vector of values to be converted 18 | 19 | } 20 | \keyword{data} 21 | \keyword{event} 22 | -------------------------------------------------------------------------------- /man/convert_cameo_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_cameo_data.R 3 | \docType{data} 4 | \name{convert_cameo_data} 5 | \alias{convert_cameo_data} 6 | \title{CAMEO code translation data frame} 7 | \format{A data frame with 310 rows and 2 columns} 8 | \usage{ 9 | convert_cameo_data 10 | } 11 | \description{ 12 | A data frame with 310 rows and 2 columns. 13 | Used internally by the \code{convert_cameo()} function. 14 | } 15 | \details{ 16 | \itemize{ 17 | \item CAMEOcode: the 310 different low-level CAMEO codes. 18 | \item EventDescription: Human-readable descriptions of the codes. 19 | } 20 | } 21 | \note{ 22 | The current CAMEO codebook is located here: \url{http://eventdata.parusanalytics.com/data.dir/cameo.html}. 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/download_icews.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/download_icews.R 3 | \name{get_icewslinks} 4 | \alias{get_icewslinks} 5 | \title{Download the ICEWS Dataset} 6 | \usage{ 7 | get_icewslinks(dv_server, dv_key) 8 | } 9 | \arguments{ 10 | \item{destpath}{The path to the directory where ICEWS should go.} 11 | } 12 | \description{ 13 | Download and unzip all of the data files for the ICEWS dataset from the 14 | Harvard Dataverse into a given directory. 15 | } 16 | \note{ 17 | This function is still in development and may contain errors and change quickly. 18 | } 19 | \examples{ 20 | 21 | download_icews("~/ICEWS/") 22 | 23 | } 24 | \author{ 25 | Original code and concept: Tony Boyles 26 | } 27 | -------------------------------------------------------------------------------- /man/download_phoenix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/download_phoenix.R 3 | \name{get_phoenixlinks} 4 | \alias{get_phoenixlinks} 5 | \title{Download the Phoenix Dataset} 6 | \usage{ 7 | get_phoenixlinks(start_date = as.Date("2014-06-20"), end_date = Sys.Date()) 8 | } 9 | \arguments{ 10 | \item{destpath}{The path to the directory where Phoenix should go.} 11 | 12 | \item{phoenix_version.}{Download a specific version of Phoenix ("v0.1.0" or the current version by default).} 13 | } 14 | \description{ 15 | Download and unzip all of the data files for the Phoenix dataset from the 16 | Phoenix data website into a given directory. 17 | } 18 | \note{ 19 | This function, like Phoenix, is still in development and may contain errors and change quickly. 20 | } 21 | \examples{ 22 | 23 | download_phoenix("~/OEDA/phoxy_test/", phoenix_version = "current") 24 | 25 | } 26 | \author{ 27 | Original code credit: Andy Halterman 28 | } 29 | -------------------------------------------------------------------------------- /man/eventNetworks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/eventNetworks.R 3 | \name{eventNetworks} 4 | \alias{eventNetworks} 5 | \title{Convert Phoenix event data to daily event-networks.} 6 | \usage{ 7 | eventNetworks(start_date, end_date, level, dv_key, phoenix_loc = NULL, 8 | icews_loc = NULL, histphoenix_loc = NULL, 9 | dv_server = "harvard.dataverse.edu", update = TRUE, actorset = "states", 10 | codeset = "all", time_window = "day", code_subset = "all", 11 | tie_type = "binary", sources = "all") 12 | } 13 | \arguments{ 14 | \item{start_date}{start date of time period as Ymd-format integer (ex: 15 | June 1, 2014 as 20140601).} 16 | 17 | \item{end_date}{end date of time period as Ymd-format integer (ex: 18 | June 1, 2014 as 20140601).} 19 | 20 | \item{level}{level of event granularity ('eventcode', 'rootcode', 21 | 'pentaclass', or 'goldstein'). 'Eventcode' creates a network for 22 | each of the 226 sub-codes in CAMEO. 'Rootcode' creates a network 23 | for each of the 20 event root codes in CAMEO. 'Pentaclass' creates 24 | a network for each of the 0-4 pentaclass codes in CAMEO. 25 | 'Goldstein' creates one or two networks denoting mean Goldstein 26 | scores, either aggregated (positive - negative) or separated into 27 | two separate networks for positive and negative Goldstein scores.} 28 | 29 | \item{dv_key}{Unique user key to access SWORD API and automatically find and 30 | download up-to-date ICEWS data.} 31 | 32 | \item{phoenix_loc}{folder containing Phoenix data sets as daily .csv 33 | data tables. Automatically checks for new data sets each time 34 | the function is run, and downloads new daily data as it becomes 35 | available. Currently in 'one-and'done' format 36 | where it downloads the first time, and checks thereafter.} 37 | 38 | \item{icews_loc}{folder containing ICEWS data sets as daily .tab data 39 | tables. Because I don't know how to work a SWORD API, these will 40 | need to be manually downloaded and updated.} 41 | 42 | \item{histphoenix_loc}{folder containing historic Phoenix data from 43 | UIUC's Cline Center for Democracy. Leave empty if you don't 44 | want to use these data.} 45 | 46 | \item{dv_server}{Dataverse server address from which to download 47 | up-to-date ICEWS data. Defaults to Harvard Dataverse at 48 | harvard.dataverse.edu.} 49 | 50 | \item{update}{should phoenixNet attempt to download new data? This will attempt 51 | to download any Phoenix data files that 'should' be present in the 52 | Phoenix data directory (one data file per day, from 2014-06-20 through 53 | the present day) and denote whether or not any of these files 54 | come up missing in the process.} 55 | 56 | \item{actorset}{set of actors for which to create event-networks. Defaults 57 | to the 255 ISO-coded states in the international system. Specifying 58 | a specific state or set of states (as 3-character ISO codes) will 59 | extract all the 'major' domestic entites within that state/states.} 60 | 61 | \item{codeset}{subset of event codes as specified by 'level'. This is useful 62 | if you desire to extract only a portion of interactions recorded 63 | by CAMEO, but has to align with the code aggregation specified 64 | in the 'level' argument. For example, if you specify 'rootcode', 65 | the 'codeset' you specify has to be one or more root codes between 66 | 1 and 20. Entering a subset of root code values would return a 67 | smaller number of network layers. Defaults to 'all'.} 68 | 69 | \item{time_window}{temporal window to build event-networks. Valid 70 | entries are 'day', 'week', 'month', or 'year'.} 71 | 72 | \item{code_subset}{subset of EVENTCODES that can be aggregated up to higher 73 | order interactions. For example, you might want to only look at 74 | event codes below 100, but then aggregate those event codes to 75 | rootcode or pentaclass.} 76 | 77 | \item{tie_type}{type of ties to return. Default is binarized ties where 78 | a tie represents the presence of one OR MORE interactions in the 79 | time period specified. Valid entries are 'binary', 'count' 80 | (count of events), 'meangoldstein' (mean Goldstein score), 81 | 'sepgoldstein' (mean positive/negative Goldstein scores separated). 82 | NOTE: choosing a Goldstein score as tie type negates the "level" 83 | argument.} 84 | 85 | \item{sources}{use only Phoenix or ICEWS data in creating event networks. 86 | Valid entries are 'phoenix', 'icews', 'histphoenix' or 'all' (default).} 87 | 88 | \item{dv_server}{location of the ICEWS Dataverse server. Defaults to 89 | "harvard.dataverse.edu" and probably won't change anytime soon.} 90 | } 91 | \value{ 92 | master_networks a LIST object containing temporally referenced event-networks. 93 | } 94 | \description{ 95 | Take event-level data and convert it into 96 | networks of interaction by time period. Output is in 97 | the form of a nested list object where each element is 98 | an R network object. These networks can then be processed 99 | and analyzed. 100 | } 101 | \note{ 102 | This function is still in early development and may contain significant errors. 103 | Don't trust it. 104 | } 105 | \author{ 106 | Jesse Hammond 107 | } 108 | -------------------------------------------------------------------------------- /man/extract_dyadstats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_dyadstats.R 3 | \name{extract_dyadstats} 4 | \alias{extract_dyadstats} 5 | \title{Extract dyad-level statistics from a given event-network.} 6 | \usage{ 7 | extract_dyadstats(input_date = this_date, event_dnet = tsna_obj) 8 | } 9 | \arguments{ 10 | \item{input_date}{A date in integer %Y%m%d format.} 11 | 12 | \item{event_dnet}{network object object containing a set of interactions.} 13 | } 14 | \value{ 15 | net_stats Table of dyad-level statistics. 16 | } 17 | \description{ 18 | INTERNAL FUNCTION: Intakes a given network object and returns a set 19 | of dyad-level statistics for output. 20 | } 21 | \keyword{data} 22 | \keyword{event} 23 | \keyword{phoenix,} 24 | -------------------------------------------------------------------------------- /man/extract_netstats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_netstats.R 3 | \name{extract_netstats} 4 | \alias{extract_netstats} 5 | \title{Extract network-level statistics from a given event-network.} 6 | \usage{ 7 | extract_netstats(input_date = this_date, event_dnet = event_dnet, 8 | datelist = dates) 9 | } 10 | \arguments{ 11 | \item{input_date}{A date in integer %Y%m%d format.} 12 | 13 | \item{event_dnet}{network object object containing a set of interactions.} 14 | } 15 | \value{ 16 | net_stats Table of network-level statistics. 17 | } 18 | \description{ 19 | INTERNAL FUNCTION: Intakes a given network object and returns a set 20 | of network-level statistics for output. 21 | } 22 | \keyword{data} 23 | \keyword{event} 24 | \keyword{phoenix,} 25 | -------------------------------------------------------------------------------- /man/extract_nodestats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_nodestats.R 3 | \name{extract_nodestats} 4 | \alias{extract_nodestats} 5 | \title{Extract node-level statistics from a given event-network.} 6 | \usage{ 7 | extract_nodestats(input_date = this_date, event_dnet = tsna_obj) 8 | } 9 | \arguments{ 10 | \item{input_date}{A date in integer %Y%m%d format.} 11 | 12 | \item{event_dnet}{network object object containing a set of interactions.} 13 | } 14 | \value{ 15 | net_stats Table of node-level statistics. 16 | } 17 | \description{ 18 | INTERNAL FUNCTION: Intakes a given network object and returns a set 19 | of node-level statistics for output. 20 | } 21 | \keyword{data} 22 | \keyword{event} 23 | \keyword{phoenix,} 24 | -------------------------------------------------------------------------------- /man/icews_cameo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/icews_cameo.R 3 | \name{icews_cameo} 4 | \alias{icews_cameo} 5 | \title{Convert ICEWS state/actor codes into CAMEO format, 6 | and extract root codes from specific CAMEO event codes.} 7 | \usage{ 8 | icews_cameo(icews) 9 | } 10 | \description{ 11 | Intake a set of ICEWS data (read in after some pre-processing) 12 | and convert entries to CAMEO format using conversion tables created 13 | by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 14 | } 15 | \details{ 16 | @param icews ICEWS data as one large data.table 17 | 18 | @return icews ICEWS data with several new CAMEO code columns. 19 | 20 | @keywords phoenix, event data 21 | 22 | @import data.table 23 | @import plyr 24 | 25 | @export 26 | } 27 | -------------------------------------------------------------------------------- /man/ingest_histphoenix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ingest_histphoenix.R 3 | \name{ingest_histphoenix} 4 | \alias{ingest_histphoenix} 5 | \title{Ingest the historic Phoenix Dataset} 6 | \usage{ 7 | ingest_histphoenix(histphoenix_loc, start_date = start_date, 8 | end_date = end_date, statelist = statelist) 9 | } 10 | \arguments{ 11 | \item{histphoenix_loc}{The path to the Phoenix folder.} 12 | 13 | \item{end_date}{} 14 | } 15 | \value{ 16 | A single dataframe with all the historic Phoenix events in the folder. 17 | } 18 | \description{ 19 | Given a directory with the historic Phoenix dataset files, quickly read 20 | them all in, name them correctly, and combine them into one dataframe. 21 | } 22 | \note{ 23 | This function, like Phoenix, is still in development and may contain errors and change quickly. 24 | } 25 | \examples{ 26 | 27 | events <- ingest_histphoenix("~/histphoenix") 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/ingest_icews.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ingest_icews.R 3 | \name{ingest_icews} 4 | \alias{ingest_icews} 5 | \title{Ingest the ICEWS Event Dataset} 6 | \usage{ 7 | ingest_icews(dir, start_date, end_date) 8 | } 9 | \arguments{ 10 | \item{dir}{The path to the ICEWS folder.} 11 | 12 | \item{start_date}{Start of date range as YYYYMMDD integer format.} 13 | 14 | \item{end_date}{End of date range as YYYYMMDD integer format.} 15 | } 16 | \value{ 17 | A single dataframe with all the ICEWS events in the folder. 18 | } 19 | \description{ 20 | Given a directory with individual ICEWS dataset files, quickly read 21 | them all in, name them correctly, and combine them into one dataframe. 22 | } 23 | \note{ 24 | This function is still in development and may contain errors and change quickly. 25 | } 26 | \examples{ 27 | 28 | events <- ingest_icews("~/ICEWS/study_28075/Data/", 20101201, 20140101) 29 | 30 | } 31 | \author{ 32 | Andy Halterman, forked by Jesse Hammond 33 | } 34 | -------------------------------------------------------------------------------- /man/ingest_phoenix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ingest_phoenix.R 3 | \name{ingest_phoenix} 4 | \alias{ingest_phoenix} 5 | \title{Ingest the Phoenix Dataset} 6 | \usage{ 7 | ingest_phoenix(phoenix_loc, start_date, end_date) 8 | } 9 | \arguments{ 10 | \item{phoenix_loc}{The path to the Phoenix folder.} 11 | 12 | \item{start_date}{Start of date range as YYYYMMDD integer format.} 13 | 14 | \item{end_date}{End of date range as YYYYMMDD integer format.} 15 | } 16 | \value{ 17 | A single dataframe with all the Phoenix events in the folder. 18 | } 19 | \description{ 20 | Given a directory with individual Phoenix dataset files, quickly read 21 | them all in, name them correctly, and combine them into one dataframe. 22 | } 23 | \note{ 24 | This function, like Phoenix, is still in development and may contain errors and change quickly. 25 | } 26 | \examples{ 27 | 28 | events <- ingest_phoenix("~/OEDA/phoxy_test/", 20140620, 20150101) 29 | 30 | } 31 | \author{ 32 | Andy Halterman, forked by Jesse Hammond 33 | } 34 | -------------------------------------------------------------------------------- /man/phoenix_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/phoenix_stats.R 3 | \name{phoenix_stats} 4 | \alias{phoenix_stats} 5 | \title{Extract statistics from daily Phoenix event-networks.} 6 | \usage{ 7 | phoenix_stats(dailynets, time_window = "day", codes = "all", 8 | do_parallel = F, n_cores = 4) 9 | } 10 | \arguments{ 11 | \item{dailynets}{networkDynamic object containing daily event-nets 12 | produced via phoenix_net function.} 13 | 14 | \item{time_window}{time interval of aggregate event-network objects. Valid 15 | entries are 'day', 'week', 'month', 'year'.} 16 | 17 | \item{codes}{string of event codes, root codes, or pentaclass codes. 18 | Note: these codes have to be in the same format as the original 19 | network layers created via 'phoenix_net'. If you specify rootcodes 20 | in the creation step, entering pentaclass codes in this step 21 | will produce an error.} 22 | 23 | \item{do_parallel}{Logical TRUE-FALSE. Whether to use parallel backend 24 | 'doMC' when extracting network statistics by code. Considerably 25 | faster than single-core, but less reliable.} 26 | } 27 | \value{ 28 | phoenix_out a LIST object of tables containing descriptive 29 | statistics for daily event-networks. 30 | } 31 | \description{ 32 | Take a set of daily networks generated by the phoenix_net function, 33 | and extract a variety of daily statistics at the network and nodal 34 | levels. 35 | } 36 | \keyword{data} 37 | \keyword{event} 38 | \keyword{phoenix,} 39 | -------------------------------------------------------------------------------- /man/phoenix_tables.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/phoenix_tables.R 3 | \name{phoenix_tables} 4 | \alias{phoenix_tables} 5 | \title{Scrape, merge, and process Phoenix and ICEWS data into 6 | a large data table for aggregation and subsetting.} 7 | \usage{ 8 | phoenix_tables(phoenix_loc, icews_loc, update = T) 9 | } 10 | \arguments{ 11 | \item{phoenix_loc}{folder containing Phoenix data sets as daily .csv 12 | data tables. Automatically checks for new data sets each time 13 | the function is run, and downloads new daily data as it becomes 14 | available. Currently in 'one-and'done' format 15 | where it downloads the first time, and checks thereafter.} 16 | 17 | \item{icews_loc}{folder containing ICEWS data sets as daily .tab data 18 | tables. Because I don't know how to work a SWORD API, these will 19 | need to be manually downloaded and updated.} 20 | } 21 | \value{ 22 | master_table a data.table object containing ALL merged/processed 23 | Phoenix and ICEWS data. One row per event-dyad-day. 24 | } 25 | \description{ 26 | Scrape, merge, and process Phoenix and ICEWS data into 27 | a large data table for aggregation and subsetting. 28 | } 29 | \note{ 30 | This function is still in early development and may contain significant errors. 31 | Don't trust it. 32 | } 33 | \author{ 34 | Jesse Hammond 35 | } 36 | -------------------------------------------------------------------------------- /man/states.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/states_doc.R 3 | \docType{data} 4 | \name{states} 5 | \alias{states} 6 | \title{ICEWS CAMEO actor codes} 7 | \format{An object of class \code{data.table} (inherits from \code{data.frame}) with 260 rows and 3 columns.} 8 | \usage{ 9 | data(states) 10 | } 11 | \description{ 12 | Merge table to convert states to CAMEO format using conversion tables created 13 | by Phil Schrodt (https://github.com/philip-schrodt/text_to_CAMEO) 14 | } 15 | \keyword{datasets} 16 | -------------------------------------------------------------------------------- /man/update_icews.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/update_icews.R 3 | \name{update_icews} 4 | \alias{update_icews} 5 | \title{Update a local directory of ICEWS dataset files with new files from the server} 6 | \usage{ 7 | update_icews(destpath) 8 | } 9 | \arguments{ 10 | \item{destpath}{The path to download ICEWS into.} 11 | } 12 | \description{ 13 | Checks the contents of a directory containing ICEWS event data files, checks whether the 14 | server has new events, and downloads them to that directory. (It'll have some version handling ability, 15 | too, either from the file names or by reading in the events.) 16 | } 17 | \note{ 18 | This function, like Phoenix, is still in development and may contain errors and change quickly. 19 | } 20 | \examples{ 21 | 22 | } 23 | \author{ 24 | Original concept and code for Phoenix: Andy Halterman 25 | } 26 | -------------------------------------------------------------------------------- /man/update_phoenix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/update_phoenix.R 3 | \name{update_phoenix} 4 | \alias{update_phoenix} 5 | \title{Update a local directory of Phoenix dataset files with new files from the server} 6 | \usage{ 7 | update_phoenix(destpath) 8 | } 9 | \arguments{ 10 | \item{destpath}{The path to download Phoenix into.} 11 | } 12 | \description{ 13 | Checks the contents of a directory containing Phoenix event data files, checks whether the 14 | server has new events, and downloads them to that directory. (It'll have some version handling ability, 15 | too, either from the file names or by reading in the events.) 16 | } 17 | \note{ 18 | This function, like Phoenix, is still in development and may contain errors and change quickly. 19 | } 20 | \examples{ 21 | 22 | } 23 | -------------------------------------------------------------------------------- /testing_script.R: -------------------------------------------------------------------------------- 1 | #devtools::install_github('jrhammond/EventNetworks') 2 | pacman::p_load(EventNetworks) 3 | ?eventNetworks 4 | 5 | test <- EventNetworks::eventNetworks( 6 | start_date = 20140101 7 | , end_date = 20150101 8 | , level = 'pentaclass' 9 | , dv_key = '002231de-d465-401b-ac91-c2697b948694' 10 | , phoenix_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\phoenix' 11 | , icews_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\icews' 12 | , histphoenix_loc = 'C:\\Users\\Jesse\\Box Sync\\DataSets\\CCHPED_v2017_06_30' 13 | , dv_server = 'harvard.dataverse.edu' 14 | , update = F 15 | , actorset = 'states' 16 | , codeset = 'all' 17 | , time_window = 'month' 18 | , code_subset = 'all' 19 | , tie_type = 'meangoldstein' 20 | , sources = 'all' 21 | ) 22 | --------------------------------------------------------------------------------