├── CAMEO_codefile.txt ├── LICENSE ├── README.md ├── agentnames.txt ├── countrynames.txt ├── text_to_CAMEO.py ├── text_to_CAMEO_1.1B1_doc.odt └── text_to_CAMEO_1.1B1_doc.pdf /CAMEO_codefile.txt: -------------------------------------------------------------------------------- 1 | //CAMEO Codelist Version: 0.7b5 (Aug 6, 2007) 2 | //Entries with LABELi correspond to the modifications found in the ICEWS files ca. June 2014 3 | // 4 | //01: MAKE PUBLIC STATEMENT 5 | LABEL: 010= Make statement 6 | LABEL: 011= Decline comment 7 | LABEL: 012= Make pessimistic comment 8 | LABEL: 013= Make optimistic comment 9 | LABEL: 014= Consider policy option 10 | LABEL: 015= Acknowledge or claim responsibility 11 | LABEL: 016= Deny responsibility 12 | LABEL: 017= Engage in symbolic act 13 | LABEL: 018= Make empathetic commment 14 | LABEL: 018= Make empathetic comment 15 | LABEL: 019= Express accord 16 | // 17 | //02: APPEAL 18 | LABEL: 020= Make an appeal or request 19 | LABEL: 021= Appeal for material cooperation 20 | LABEL: 0211= Appeal for economic cooperation 21 | LABEL: 0212= Appeal for military cooperation 22 | LABEL: 0213= Appeal for judicial cooperation 23 | LABEL: 0214= Appeal for intelligence cooperation 24 | LABELi: 0214= Appeal for intelligence 25 | LABEL: 022= Appeal for diplomatic cooperation (such as policy support) 26 | LABEL: 023= Appeal for aid 27 | LABEL: 0231= Appeal for economic aid 28 | LABEL: 0232= Appeal for military aid 29 | LABEL: 0233= Appeal for humanitarian aid 30 | LABEL: 0234= Appeal for military protection or peacekeeping 31 | LABEL: 024= Appeal for political reform 32 | LABEL: 0241= Appeal for change in leadership 33 | LABEL: 0242= Appeal for policy change 34 | LABEL: 0243= Appeal for rights 35 | LABEL: 0244= Appeal for change in institutions, regime 36 | LABEL: 025= Appeal for target to yield 37 | LABELi: 025= Appeal to yield 38 | LABEL: 0251= Appeal for easing of administrative sanctions 39 | LABEL: 0252= Appeal for easing of popular dissent 40 | LABELi: 0252= Appeal for easing of political dissent 41 | LABEL: 0253= Appeal for release of persons or property 42 | LABEL: 0254= Appeal for easing of economic sanctions, boycott, or embargo 43 | LABEL: 0255= Appeal for target to allow international involvement (non-mediation) 44 | LABEL: 0256= Appeal for de-escalation of military engagement 45 | LABELi: 0251= Demand easing of administrative sanctions 46 | LABELi: 0252= Demand easing of popular dissent 47 | LABELi: 0253= Demand release of persons or property 48 | LABELi: 0254= Demand easing of economic sanctions, boycott, or embargo 49 | LABELi: 0255= Demand target to allow international involvement (non-mediation) 50 | LABELi: 0256= Demand de-escalation of military engagement 51 | LABEL: 026= Appeal to others to meet or negotiate 52 | LABEL: 027= Appeal to others to settle dispute 53 | LABEL: 028= Appeal to engage mediation 54 | LABELi: 028= Appeal to engage in or accept mediation 55 | // 56 | //03= EXPRESS INTENT TO COOPERATE 57 | LABEL: 030= Express intent to cooperate 58 | LABEL: 031= Agree to engage in material cooperation 59 | LABELi: 031= Express intent to engage in material cooperation 60 | LABEL: 0311= Express intent to cooperate economically 61 | LABEL: 0312= Express intent to cooperate militarily 62 | LABEL: 0313= Express intent to cooperate on judicial matters 63 | LABEL: 0314= Express intent to cooperate on intelligence 64 | LABEL: 032= Express intent to engage in diplomatic cooperation (such as policy support) 65 | LABEL: 033= Express intent to provide material aid 66 | LABEL: 0331= Express intent to provide economic aid 67 | LABEL: 0332= Express intent to provide military aid 68 | LABEL: 0333= Express intent to provide humanitarian aid 69 | LABEL: 0334= Express intent to provide military protection or peacekeeping 70 | LABEL: 034= Express intent to bring political reform 71 | LABELi: 034= Express intent to institute political reform 72 | LABEL: 0341= Express intent to change leadership 73 | LABEL: 0342= Express intent to change policy 74 | LABEL: 0343= Express intent to provide rights 75 | LABEL: 0344= Express intent to change institutions, regime 76 | LABEL: 035= Express intent to yield 77 | LABEL: 0351= Express intent to ease administrative sanctions 78 | LABEL: 0352= Express intent to ease popular dissent 79 | LABEL: 0353= Express intent to release persons or property 80 | LABEL: 0354= Express intent to ease economic sanctions, boycott, embargo 81 | LABEL: 0354= Express intent to ease economic sanctions, boycott, or embargo 82 | LABEL: 0355= Express intent to allow international involvement (non-mediation) 83 | LABEL: 0356= Express intent to de-escalate military engagement 84 | LABEL: 036= Express intent to meet or negotiate 85 | LABEL: 037= Express intent to settle dispute 86 | LABEL: 038= Express intent to accept mediation 87 | LABEL: 039= Express intent to mediate 88 | // 89 | //04= CONSULT 90 | LABEL: 040= Consult 91 | LABEL: 041= Discuss by telephone 92 | LABEL: 042= Make a visit 93 | LABEL: 043= Host a visit 94 | LABEL: 044= Meet at a 'third' location 95 | LABEL: 045= Engage in mediation 96 | LABELi: 045= Mediate 97 | LABEL: 046= Engage in negotiation 98 | // 99 | //05= ENGAGE IN DIPLOMATIC COOPERATION 100 | LABEL: 050= Engage in diplomatic cooperation 101 | LABEL: 051= Praise or endorse 102 | LABEL: 052= Defend verbally 103 | LABEL: 053= Rally support on behalf of 104 | LABEL: 054= Grant diplomatic recognition 105 | LABEL: 055= Apolgize 106 | LABELi: 055= Apologize 107 | LABEL: 056= Forgive 108 | LABEL: 057= Sign formal agreement 109 | // 110 | //06= ENGAGE IN MATERIAL COOPERATION 111 | LABEL: 060= Engage in material cooperation 112 | LABEL: 061= Cooperate economically 113 | LABEL: 062= Cooperate militarily 114 | LABEL: 063= Engage in judicial cooperation 115 | LABEL: 064= Share intelligence or information 116 | // 117 | //07= PROVIDE AID 118 | LABEL: 070= Provide aid 119 | LABEL: 071= Provide economic aid 120 | LABEL: 072= Provide military aid 121 | LABEL: 073= Provide humanitarian aid 122 | LABEL: 074= Provide military protection or peacekeeping 123 | LABEL: 075= Grant asylum 124 | // 125 | //08= YIELD 126 | LABEL: 080= Yield 127 | LABEL: 081= Ease administrative sanctions 128 | LABEL: 0811= Ease restrictions on freedoms of expression 129 | LABEL: 0811i= Ease restrictions on political freedoms 130 | LABEL: 0812= Ease ban on political parties or politicians 131 | LABEL: 0813= Ease curfew 132 | LABEL: 0814= Ease state of emergency or martial law 133 | LABEL: 082= Ease political dissent 134 | LABEL: 083= Accede to requests or demands for political reform 135 | LABEL: 0831= Accede to demands for change in leadership 136 | LABEL: 0832= Accede to demands for change in policy 137 | LABEL: 0833= Accede to demands for rights 138 | LABEL: 0834= Accede to demands for change in institutions, regime 139 | LABEL: 084= Return, release 140 | LABEL: 0841= Return, release person(s) 141 | LABEL: 0842= Return, release property 142 | LABEL: 085= Ease economic sanctions, boycott, embargo 143 | LABEL: 086= Allow international involvement 144 | LABEL: 0861= Receive deployment of peacekeepers 145 | LABEL: 0862= Receive inspectors 146 | LABEL: 0863= Allow delivery of humanitarian aid 147 | LABELi: 0863= Allow humanitarian access 148 | LABEL: 087= De-escalate military engagement 149 | LABEL: 0871= Declare truce, ceasefire 150 | LABEL: 0872= Ease military blockade 151 | LABEL: 0873= Demobilize armed forces 152 | LABEL: 0874= Retreat or surrender militarily 153 | // 154 | //09= INVESTIGATE 155 | LABEL: 090= Investigate 156 | LABEL: 091= Investigate crime, corruption 157 | LABEL: 092= Investigate human rights abuses 158 | LABEL: 093= Investigate military action 159 | LABEL: 094= Investigate war crimes 160 | // 161 | //10= DEMAND 162 | LABEL: 100= Demand 163 | LABEL: 101= Demand material cooperation 164 | LABEL: 1011= Demand economic cooperation 165 | LABEL: 1012= Demand military cooperation 166 | LABEL: 1013= Demand judicial cooperation 167 | LABEL: 1014= Demand intelligence cooperation 168 | LABEL: 102= Demand diplomatic cooperation (such as policy support) 169 | LABEL: 103= Demand aid 170 | LABELi: 103= Demand material aid 171 | LABEL: 1031= Demand economic aid 172 | LABEL: 1032= Demand military aid 173 | LABEL: 1033= Demand humanitarian aid 174 | LABEL: 1034= Demand military protection or peacekeeping 175 | LABEL: 104= Demand political reform 176 | LABEL: 1041= Demand change in leadership 177 | LABEL: 1042= Demand policy change 178 | LABEL: 1043= Demand rights 179 | LABEL: 1044= Demand change in institutions, regime 180 | LABEL: 105= Demand that target yields 181 | LABEL: 1051= Demand easing of adminstrative sanctions 182 | LABEL: 1052= Demand easing of popular dissent 183 | LABELi: 1052= Demand easing of political dissent 184 | LABEL: 1053= Demand release of persons or property 185 | LABEL: 1054= Demand easing of economic sanctions, boycott, or embargo 186 | LABEL: 1055= Demand that target allows international involvement (non-mediation) 187 | LABEL: 1056= Demand de-escalation of military engagement 188 | LABEL: 106= Demand meeting, negotiation 189 | LABEL: 107= Demand settling of dispute 190 | LABEL: 108= Demand mediation 191 | // 192 | //11= DISAPPROVE 193 | LABEL: 110= Disapprove 194 | LABEL: 111= Criticize or denounce 195 | LABEL: 112= Accuse 196 | LABEL: 1121= Accuse of crime, corruption 197 | LABEL: 1122= Accuse of human rights abuses 198 | LABEL: 1123= Accuse of aggression 199 | LABEL: 1124= Accuse of war crimes 200 | LABEL: 1125= Accuse of espionage, treason 201 | LABEL: 113= Rally opposition against 202 | LABEL: 114= Complain officially 203 | LABEL: 115= Bring lawsuit against 204 | LABEL: 116= Find guilty or liable (legally) 205 | LABELi: 116= find guilty or liable (legally) 206 | 207 | // 208 | //12= REJECT 209 | LABEL: 120= Reject 210 | LABEL: 121= Reject material cooperation 211 | LABEL: 1211= Reject economic cooperation 212 | LABEL: 1212= Reject military cooperation 213 | LABEL: 1213= Reject judicial cooperation 214 | LABEL: 1214= Reject intelligence cooperation 215 | LABEL: 122= Reject request or demand for material aid 216 | LABELi: 122= Reduce or stop material aid 217 | LABEL: 1221= Reject demand for economic aid 218 | LABEL: 1222= Reject demand for military aid 219 | LABEL: 1223= Reject demand for humanitarian aid 220 | LABEL: 1224= Reject demand for military protection or peacekeeping 221 | LABELi: 1231= Reject request for change in leadership 222 | LABELi: 1232= Reject request for policy change 223 | LABELi: 1233= Reject request for rights 224 | LABELi: 1234= Reject request for change in institutions, regime 225 | LABELi: 1221= Reject request for economic aid 226 | LABELi: 1222= Reject request for military aid 227 | LABELi: 1223= Reject request for humanitarian aid 228 | LABELi: 1224= Reject request for military protection or peacekeeping 229 | LABEL: 123= Reject request or demand for political reform 230 | LABEL: 1231= Reject demand for change in leadership 231 | LABEL: 1232= Reject demand for policy change 232 | LABEL: 1233= Reject demand for rights 233 | LABEL: 1234= Reject demand for change in institutions, regime 234 | LABELi: 1231= Reject request for change in leadership 235 | LABELi: 1232= Reject request for policy change 236 | LABELi: 1233= Reject request for rights 237 | LABELi: 1234= Reject request for change in institutions, regime 238 | LABEL: 124= Decline to yield 239 | LABEL: 1241= Decline to ease administrative sanctions 240 | LABEL: 1242= Decline to ease popular dissent 241 | LABEL: 1243= Decline to release persons or property 242 | LABEL: 1244= Decline to ease economic sanctions, boycott, or embargo 243 | LABEL: 1245= Decline to allow international involvement (non-mediation) 244 | LABEL: 1246= Decline to de-escalate military engagement 245 | LABELi: 124= Refuse to yield 246 | LABELi: 1241= Refuse to ease administrative sanctions 247 | LABELi: 1242= Refuse to ease popular dissent 248 | LABELi: 1243= Refuse to release persons or property 249 | LABELi: 1244= Refuse to ease economic sanctions, boycott, or embargo 250 | LABELi: 1245= Refuse to allow international involvement (non mediation) 251 | LABELi: 1246= Refuse to de-escalate military engagement 252 | LABEL: 125= Reject proposal to meet, discuss, negotiate 253 | LABELi: 125= Reject proposal to meet, discuss, or negotiate 254 | LABEL: 126= Reject mediation 255 | LABEL: 127= Reject plan, agreement to settle dispute 256 | LABEL: 128= Defy norms, law 257 | LABEL: 129= Veto 258 | // 259 | //13= THREATEN 260 | LABEL: 130= Threaten 261 | LABEL: 131= Threaten non-force 262 | LABEL: 1311= Threaten to reduce or stop aid 263 | LABEL: 1312= Threaten to boycott, embargo, or sanction 264 | LABELi: 1312= Threaten with sanctions, boycott, embargo 265 | LABEL: 1313= Threaten to reduce or break relations 266 | LABEL: 132= Threaten with administrative sanctions 267 | LABEL: 1321= Threaten to impose restrictions on freedoms of speech and expression 268 | LABEL: 1321i= Threaten with restrictions on political freedoms 269 | LABEL: 1322= Threaten to ban political parties or politicians 270 | LABEL: 1323= Threaten to impose curfew 271 | LABEL: 1324= Threaten to impose state of emergency or martial law 272 | LABEL: 133= Threaten with political dissent, protest 273 | LABEL: 134= Threaten to halt negotiations 274 | LABEL: 135= Threaten to halt mediation 275 | LABEL: 136= Threaten to halt (expel or withdraw) international involvement (non-mediation) 276 | LABELI: 136= Threaten to halt international involvement (non-mediation) 277 | LABEL: 137= Threaten with violent repression 278 | LABELi: 13y= Threaten with repression 279 | LABEL: 138= Threaten with military force 280 | LABEL: 138i= Threaten to use military force 281 | LABEL: 1381= Threaten blockade 282 | LABEL: 1382= Threaten occupation 283 | LABEL: 1383= Threaten unconventional violence 284 | LABEL: 1384= Threaten conventional attack 285 | LABEL: 1385= Threaten attack with WMD 286 | LABEL: 139= Give ultimatum 287 | // 288 | //14= PROTEST 289 | LABEL: 140= Engage in political dissent 290 | LABEL: 141= Demonstrate or rally 291 | LABEL: 1411= Demonstrate for leadership change 292 | LABEL: 1412= Demonstrate for policy change 293 | LABEL: 1413= Demonstrate for rights 294 | LABEL: 1414= Demonstrate for change in institutions, regime 295 | LABEL: 142= Conduct hunger strike 296 | LABEL: 1421= Conduct hunger strike for leadership change 297 | LABEL: 1422= Conduct hunger strike for policy change 298 | LABEL: 1423= Conduct hunger strike for rights 299 | LABEL: 1424= Conduct hunger strike for change in institutions, regime 300 | LABEL: 143= Conduct strike or boycott 301 | LABEL: 1431= Conduct strike or boycott for change in leadership 302 | LABELi: 1431= Conduct strike or boycott for leadership change 303 | LABEL: 1432= Conduct strike or boycott for policy change 304 | LABEL: 1433= Conduct strike or boycott for rights 305 | LABEL: 1434= Conduct strike or boycott for change in institutions, regime 306 | LABEL: 144= Obstruct passage, block 307 | LABEL: 1441= Obstruct passage to demand change in leadership 308 | LABELi: 1441= Obstruct passage to demand leadership change 309 | LABEL: 1442= Obstruct passage to demand policy change 310 | LABEL: 1443= Obstruct passage to demand rights 311 | LABEL: 1444= Ostruct passage to demand change in institutions, regime 312 | LABEL: 145= Protest violently, riot 313 | LABEL: 1451= Engage in violent protest for change in leadership 314 | LABELi: 1451i= Engage in violent protest for leadership change 315 | LABEL: 1452= Engage in violent protest for policy change 316 | LABEL: 1453= Engage in violent protest for rights 317 | LABEL: 1454= Engage in violent protest for change in institutions, regime 318 | // 319 | //15= EXHIBIT MILITARY POSTURE 320 | LABEL: 150= Demonstrate military or police power 321 | LABEL: 151= Increase police alert status 322 | LABEL: 152= Increase military alert status 323 | LABEL: 153= Mobilize or increase police power 324 | LABEL: 154= Mobilize or increase armed forces 325 | // 326 | //16= REDUCE RELATIONS 327 | LABEL: 160= Reduce relations 328 | LABEL: 161= Reduce or break diplomatic relations 329 | LABEL: 162= Reduce or stop aid 330 | LABEL: 1621= Reduce or stop economic assistance 331 | LABEL: 1622= Reduce or stop military assistance 332 | LABEL: 1623= Reduce or stop humanitarian assistance 333 | LABEL: 163= Impose embargo, boycott, or sanctions 334 | LABEL: 164= Halt negotiations 335 | LABEL: 165= Halt mediation 336 | LABEL: 166= Expel or withdraw 337 | LABEL: 1661= Expel or withdraw peacekeepers 338 | LABEL: 1662= Expel or withdraw inspectors, observers 339 | LABEL: 1663= Expel or withdraw aid agencies 340 | // 341 | //17= COERCE 342 | LABEL: 170= Coerce 343 | LABEL: 171= Seize or damage property 344 | LABEL: 1711= Confiscate property 345 | LABEL: 1712= Destroy property 346 | LABEL: 172= Impose administrative sanctions 347 | LABEL: 1721= Impose restrictions on freedoms of speech and expression 348 | LABELi: 1721= Impose restrictions on political freedoms 349 | LABEL: 1722= Ban political parties or politicians 350 | LABEL: 1723= Impose curfew 351 | LABEL: 1724= Impose state of emergency or martial law 352 | LABEL: 173= Arrest, detain, or charge with legal action 353 | LABEL: 174= Expel or deport individuals 354 | LABEL: 175= Use violent repression 355 | LABEL: 175= Use tactics of violent repression 356 | // 357 | //18= ASSAULT 358 | LABEL: 180= Use unconventional violence 359 | LABEL: 181= Abduct, hijack 360 | LABELi: 181= Abduct, hijack, or take hostage 361 | LABEL: 182= Physically assault 362 | LABEL: 1821= Sexually assault 363 | LABEL: 1822= Torture 364 | LABEL: 1823= Kill by physical assault 365 | LABEL: 183= Conduct suicide, car, or other non-military bombing 366 | LABEL: 1831= Carry out suicide bombing 367 | LABEL: 1832= Carry out car bombing 368 | LABEL: 1833= Carry out roadside bombing 369 | LABEL: 184= Use as human shield 370 | LABEL: 185= Attempt to assassinate 371 | LABEL: 186= Assassinate 372 | // 373 | //19= FIGHT 374 | LABEL: 190= Use conventional military force 375 | LABEL: 191= Impose blockade, restrict movement 376 | LABEL: 192= Occupy territory 377 | LABEL: 193= Fight with small arms and light weapons 378 | LABELi: 193= fight with small arms and light weapons 379 | LABEL: 194= Fight with artillery and tanks 380 | LABELi: 194= fight with artillery and tanks 381 | LABEL: 195= Employ aerial weapons 382 | LABEL: 1951= Employ precision-guided aerial munitions 383 | LABEL: 1952= Employ remotely piloted aerial munitions 384 | LABEL: 196= Violate ceasefire 385 | // 386 | //20= ATTACK WITH WEAPONS OF MASS DESTRUCTION 387 | LABEL: 200= Use unconventional mass violence 388 | LABEL: 201= Engage in mass expulsion 389 | LABEL: 202= Engage in mass killings 390 | LABEL: 203= Engage in ethnic cleansing 391 | LABEL: 204= Use weapons of mass destruction 392 | LABEL: 2041= Use chemical, biological, or radiological weapons 393 | LABEL: 2042= Detonate nuclear weapons 394 | ~~~~~ 395 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Philip Schrodt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | text_to_CAMEO 2 | ============= 3 | 4 | This Python3 program takes data in the text-oriented ICEWS files and converts 5 | this to a more conventional data format using the CAMEO codes. The conversion process is described in detail 6 | in the file *text_to_CAMEO_documentation.pdf*. 7 | 8 | To run: python text_to_CAMEO.py [-F] [-c] [-t \] [-m] 9 | 10 | Options: 11 | -------- 12 | 13 | -F: Files are in FOUO format. Default: Files are in Dataverse format 14 | 15 | -c: Include COW numerical country codes in addition to ISO-3166 code. Default: Include only the ISO codes 16 | 17 | -t: \ Process the files listed one per line in the text file \. Default: process all of the files in the working directory that end in “.csv” (Dataverse format) or “.tab” (FOUO format) 18 | 19 | -m: Output all of the substate agents in a concatenated string. Default: only a single agent is used, with the priority determined by the list agentcodes. 20 | 21 | Requires: 22 | --------- 23 | 24 | CAMEO_codefile.txt [FOUO only] 25 | countrynames.txt 26 | agentnames.txt 27 | 28 | JSON conversion 29 | --------------- 30 | 31 | Template code---this is from an active and fully debugged program, but it has some project-specific quirks---for converting ICEWS to JSON can be found at https://github.com/philip-schrodt/ICEWS-to-jsonl: using JSON rather than `.csv` is highly recommended if you are planning to do anything complicated. 32 | 33 | Comments on the April-2017 modifications 34 | ---------------------------------------- 35 | 36 | 1. This program merges two earlier versions I'd used separately for the two formats. Both of those were used successfully in large-scale projects so I'm confident they were working, but I've not done that sort of testing on this merged version (I have done *basic* testing on it...really...). So definitely check and make sure the output makes sense. 37 | 38 | 2. The fact that there are at least two incompatible formats of the ICEWS data suggests the possibility that there might be others: again, check to make sure your output makes sense. 39 | 40 | 3. There's an assortment of “commented out” code in the program that was used earlier to do some basic marginals on the code: this could be reactivate but is in Python 2.6 and will also need a bit of updating to Python 3.5. 41 | 42 | Comments on the March-2020 modifications 43 | ---------------------------------------- 44 | 45 | Dataverse files for 2015 through 2019 have options for three different file formats: the one compatible with 46 | this program is `Original File Format (Tab-Delimited)` which downloads as a file with a `.tsv` suffix. The new files 47 | under the `Tab-Delimited` option (`.tab` file suffix) have the string data fields in quotes, which is cool but that format 48 | change causes the actor names not to match, so these all produce "---". I've modified the program so that the default (no 49 | command line options) codes all files with either `.tab` or `.tsv` as the suffix. 50 | 51 | ICEWS has been off-line for about nine 52 | months and will be produced by Leidos rather than Lockheed when/if it returns, so I'm waiting to see if there are 53 | additional changes under the new sponsorship; in the meantime this fix should work as long as you download the `.tsv` file 54 | and not the `.tab` file for those years. 55 | 56 | February-2021 modifications 57 | ---------------------------------------- 58 | 59 | * -m option for output of all substate actors 60 | * -F now detects the format change in FOUO version ca. May-2020. If the CAMEO code is already in the data, this is used. 61 | * 1951 and 1952 codes added to CAMEO_codefile.txt 62 | * os.path.basename() used for finding filename, generalizing beyond Unix 63 | -------------------------------------------------------------------------------- /agentnames.txt: -------------------------------------------------------------------------------- 1 | Government GOV GOV 2 | Executive GOV GOV1 3 | Executive Office GOV GOV2 4 | Cabinet GOV GOV3 5 | Agriculture / Fishing / Forestry Ministry GOVAGR GOVAGR 6 | Finance / Economy / Commerce / Trade Ministry GOVBUS GOVBUS 7 | Defense / Security Ministry GOVMIL GOVMIL 8 | Education Ministry GOVEDU GOVEDU 9 | Energy Ministry GOVENV GOV4 10 | Environment Ministry GOVENV GOVENV 11 | Transportation Ministry GOVENV GOVENV5 12 | Food Ministry GOVENV GOVENV6 13 | Disaster Ministry GOVENV GOVENV7 14 | Foreign Ministry GOV GOVENV8 15 | Health Ministry GOVHLH GOVHLH 16 | Interior / Home Ministry GOV GOV9 17 | Industrial / Textiles / Mining Ministry GOVBUS GOVHLH10 18 | Science / Tech / Knowledge / Innovation Ministry GOVHLH GOVHLH11 19 | NGO Ministry GOV GOVHLH12 20 | Labor Ministry GOVLAB GOVLAB 21 | Post / Tecoms Ministry GOVBUS GOVLAB13 22 | Science / Tech Ministry GOVBUS GOVLAB14 23 | Water Ministry GOVLAB GOVLAB15 24 | Women / Children / Social / Welfare / Development / Religion Ministry GOVDEV GOVDEV 25 | Justice / Law Ministry GOVJUD GOVJUD 26 | Tourism Ministry GOVJUD GOVJUD16 27 | Drugs Ministry GOVJUD GOVJUD17 28 | Human Rights Ministry GOVHRI GOVHRI 29 | Elections Ministry GOVHRI GOVHRI18 30 | Housing / Construction Ministry GOVHRI GOVHRI19 31 | Intelligence Ministry SPY SPY 32 | Information / Communication / Transparency Ministry GOVMED GOVMED 33 | State Media GOVMED GOVMED1 34 | Management / Budget / Planning / Organization Ministry GOV GOV2 35 | State Owned Enterprises GOVBUS GOVBUS20 36 | State-Owned Agricultural GOVBUS GOVBUS21 37 | State-Owned Transportation GOVBUS GOVBUS22 38 | State-Owned Utilities GOVBUS GOVBUS23 39 | State-Owned Heavy Industrial / Chemical GOVBUS GOVBUS24 40 | State-Owned Defense / Security GOVBUS GOVBUS25 41 | State-Owned Durable Goods GOVBUS GOVBUS26 42 | State-Owned Consumer Goods GOVBUS GOVBUS27 43 | State-Owned Consumer Services GOVBUS GOVBUS28 44 | State-Owned Consulting / Financial Services GOVBUS GOVBUS29 45 | State-Owned Science / Tech / Knowledge / Innovation GOVBUS GOVBUS30 46 | State-Owned Medical / Health / Pharmeceutical GOVBUS GOVBUS31 47 | Police COP COP 48 | National / Border Divisions COP COP32 49 | Provincial Divisions COP COP33 50 | Municipal Divisions COP COP34 51 | Military MIL MIL 52 | Military Intelligence MILSPY MILSPY 53 | Military Intelligence Headquarters MILSPY MILSPY35 54 | Military Intelligence Special Forces MILSPY MILSPY36 55 | Military Intelligence Infantry / Regular MILSPY MILSPY37 56 | Military Intelligence Mechanized (Ships, Tanks, Planes) MILSPY MILSPY38 57 | Military Intelligence Education / Training MILSPY MILSPY39 58 | Military Intelligence Support MILSPY MILSPY40 59 | Military Intelligence Medical MILSPY MILSPY41 60 | Research And Design Wings MIL MIL42 61 | Research And Design Wings Headquarters MIL MIL43 62 | Research And Design Wings Education / Training MIL MIL44 63 | Research And Design Wings Support MIL MIL45 64 | Research And Design Wings Medical MIL MIL46 65 | Army MIL MIL47 66 | Army Headquarters MIL MIL48 67 | Army Special Forces MIL MIL49 68 | Army Infantry / Regular MIL MIL50 69 | Army Mechanized (Ships, Tanks, Planes) MIL MIL51 70 | Army Education / Training MIL MIL52 71 | Army Support MIL MIL53 72 | Army Medical MIL MIL54 73 | Navy MIL MIL55 74 | Navy Headquarters MIL MIL56 75 | Navy Special Forces MIL MIL57 76 | Navy Infantry / Regular MIL MIL58 77 | Navy Mechanized (Ships, Tanks, Planes) MIL MIL59 78 | Navy Education / Training MIL MIL60 79 | Navy Support MIL MIL61 80 | Navy Medical MIL MIL62 81 | Air Force MIL MIL63 82 | Air Force Headquarters MIL MIL64 83 | Air Force Special Forces MIL MIL65 84 | Air Force Infantry / Regular MIL MIL66 85 | Air Force Mechanized (Ships, Tanks, Planes) MIL MIL67 86 | Air Force Education / Training MIL MIL68 87 | Air Force Support MIL MIL69 88 | Air Force Medical MIL MIL70 89 | Marines MIL MIL71 90 | Marines Headquarters MIL MIL72 91 | Marines Special Forces MIL MIL73 92 | Marines Infantry / Regular MIL MIL74 93 | Marines Mechanized (Ships, Tanks, Planes) MIL MIL75 94 | Marines Education / Training MIL MIL76 95 | Marines Support MIL MIL77 96 | Marines Medical MIL MIL78 97 | Coast Guard MIL MIL79 98 | Coast Guard Headquarters MIL MIL80 99 | Coast Guard Special Forces MIL MIL81 100 | Coast Guard Infantry / Regular MIL MIL82 101 | Coast Guard Mechanized (Ships, Tanks, Planes) MIL MIL83 102 | Coast Guard Education / Training MIL MIL84 103 | Coast Guard Support MIL MIL85 104 | Coast Guard Medical MIL MIL86 105 | Legislative / Parliamentary LEG LEG 106 | Upper House LEG LEG87 107 | Lower House LEG LEG88 108 | Unicameral LEG LEG89 109 | Judicial JUD JUD 110 | National / Supreme Court JUD JUD90 111 | Provincial Court JUD JUD91 112 | Municipal / District Court JUD JUD92 113 | Civil Court JUD JUD93 114 | Religious Court JUD JUD94 115 | Military / Tribunal JUD JUD95 116 | Local JUD JUD96 117 | Provincial JUD JUD97 118 | Municipal JUD JUD98 119 | Government Religious GOVREL GOVREL 120 | Parties PTY PTY 121 | (National) Major Party PTY PTY99 122 | Opposition Major Party (Out Of Government) OPPPTY OPPPTY 123 | Government Major Party (In Government) GOVPTY GOVPTY 124 | (National) Minor Party PTY PTY100 125 | Opposition Minor Party (Out Of Government) OPPPTY OPPPTY101 126 | Government Minor Party (In Government) GOVPTY GOVPTY102 127 | Provincial Party PTY PTY103 128 | Opposition Provincial Party (Out Of Government) OPPPTY OPPPTY104 129 | Government Provincial Party (In Government) GOVPTY GOVPTY105 130 | Municipal Party PTY PTY106 131 | Opposition Municipal Party (Out Of Government) OPPPTY OPPPTY107 132 | Government Municipal Party (In Government) GOVPTY GOVPTY108 133 | Dissident OPP OPP109 134 | Criminals / Gangs CRM CRM 135 | Protestors / Popular Opposition / Mobs OPP OPP 136 | Exiles OPP OPP110 137 | Banned Parties OPPPTY OPP111 138 | Radicals / Extremists / Fundamentalists REB RAD 139 | Organized Violent REB RAD112 140 | Rebel REB REB 141 | Insurgents REB INS 142 | Separatists REB SEP 143 | Social CVL CIV113 144 | Agricultural BUS BUSAGR 145 | Business BUS BUS 146 | Agricultural Business BUS BUS122 147 | Transportation Business BUS BUS123 148 | Utilities Business BUS BUS124 149 | Heavy Industrial / Chemical Business BUS BUS125 150 | Defense / Security Business BUS BUS126 151 | Durable Goods Business BUS BUS127 152 | Consumer Goods Business BUS BUS128 153 | Consumer Services Business BUS BUS129 154 | Consulting / Financial Services Business BUS BUS130 155 | Science / Tech / Knowledge / Innovation Business BUS BUS131 156 | Medical / Health / Pharmeceutical Business BUS BUS132 157 | Education EDU EDU 158 | Student EDU EDU115 159 | National Ethnic CVL CVL116 160 | National Ethnic Majority CVL CVL117 161 | National Ethnic Minority CVL CVL118 162 | General Population / Civilian / Social CVL CVL 163 | Labor LAB LAB 164 | Legal JUD JUD114 165 | Media MED MED 166 | News MED MED133 167 | Print News MED MED134 168 | Radio News MED MED135 169 | Television News MED MED136 170 | Online News MED MED137 171 | Entertainment MED MED138 172 | Print Entertainment MED MED139 173 | Radio Entertainment MED MED140 174 | Television Entertainment MED MED141 175 | Online Entertainment MED MED142 176 | Medical / Health MED MED99e 177 | Refugees / Displaced REF REF 178 | National Religious REF REF119 179 | Religious Majority REF REF120 180 | Religious Minority REF REF121 181 | Nongovernmental Organizations / Activists IGO IGO143 182 | Agricultural NGOs IGOAGR IGO148 183 | Business NGOs IGOBUS IGO144 184 | Education NGOs IGOEDU IGO146 185 | Energy NGOs IGO IGO147 186 | Environment NGOs IGOENV IGOENV 187 | Ethnic NGOs IGOEDU IGO 188 | Development NGOs IGODEV IGODEV 189 | Human Rights NGOs IGOHRI IGOHRI 190 | Information / Communication / Transparency NGOs IGO IGO150 191 | Labor NGOs IGOLAB IGO154 192 | Legal NGOs IGOHRI IGOHRI185 193 | Media NGOs IGO IGO438 194 | Medical / Health NGOs IGOHLH IGOHLH 195 | Refugees / Displaced NGOs IGOREF IGO145 196 | Charity NGOs IGOHLH IGOHLH149 197 | Elite CVL ELI 198 | Unidentified Forces UAF UAF 199 | Unaffiliated Sectors iSec iSec 200 | International Religious REL REL 201 | Atheist ATH ATH 202 | Animist PAG PAG 203 | Shamanist SHA SHA 204 | Muslim MOS MOS 205 | Alewi MOSALE MOSALE 206 | Sunni MOSSUN MOSSUN 207 | Shia MOSSHI MOSSHI 208 | Sufi MOSSFI MOSSFI 209 | Druze MOSDRZ MOSDRZ 210 | Hindu HIN HIN 211 | Christian CHR CHR 212 | Catholic CHRCTH CHRCTH 213 | Protestant CHRPRO CHRPRO 214 | Coptic CHRCPT CHRCPT 215 | Jehovah's Witness CHRJHW CHRJHW 216 | Maronite CHRMRN CHRMRN 217 | Orthodox CHRDOX CHRDOX 218 | Buddhist BUD BUD 219 | Jewish JEW JEW 220 | Daoist TAO REL152 221 | Shinto REL REL153 222 | Sikh SIK SIK 223 | Ahmadiyya SIK SIK154 224 | Jain JAN JAN 225 | Mormon LDS LDS 226 | Baha'I BAH BAH 227 | Zoroastrian / Mazdi ZRO ZRO 228 | Confucian CON CON 229 | International Ethnic REL REL155 230 | Chechen CNY CNY 231 | Kashmiri KAS KAS 232 | Acehnese KAS KAS156 233 | Han KAS KAS157 234 | Tamil TAM TAM 235 | Sinhalese SNL SNL 236 | Bodo SNL SNL158 237 | Shan SNL SNL159 238 | Mizo SNL SNL160 239 | Chakma CKM CKM 240 | Kuki CKM CKM161 241 | Tripuri CKM CKM162 242 | Moro CKM CKM163 243 | Uyghur UIG UIG 244 | Hmong UIG UIG164 245 | Karen UIG UIG165 246 | Assamese UIG UIG167 247 | Tibetan TIB TIB 248 | Albanian (Ethnic Group) ABN ABN 249 | Arab ARB ARB 250 | Arab, Arabian ARB ARB16691 251 | Arab, Hassaniya ARB ARB16692 252 | Arab, Levant ARB ARB16693 253 | Arab, Libyan ARB ARB16694 254 | Arab, Maghreb ARB ARB16695 255 | Arab, Shuwa ARB ARB16696 256 | Arab, Sudan ARB ARB16697 257 | Arab, Yemeni ARB ARB16698 258 | Bedouin BED BED 259 | Bedouin, Arabian BED BED16716 260 | Bedouin, Saharan BED BED16717 261 | Croat (Ethnic Group) CRO CRO 262 | Gypsy GYP GYP 263 | Hausa HAU HAU 264 | Hutu HUT HUT 265 | Ibo IBO IBO 266 | Ljaw IJW IJW 267 | Krahn KRH KRH 268 | Kurd (Ethnic Group) KUR KUR 269 | Mandingoe MAN MAN 270 | Ogoni OGO OGO 271 | Palestinian PAL PAL 272 | Serb (Ethnic Group) SER SER 273 | Slav SLA SLA 274 | Slav, Eastern SLA SLA16862 275 | Slav, Southern SLA SLA16863 276 | Slav, Western SLA SLA16864 277 | Tuareg TRG TRG 278 | Turk (Ethnic Group) TRK TRK 279 | Tutsi TUT TUT 280 | Yoruba YRB YRB 281 | Josua Project Ethnic Groups ETN ETN 282 | Aborigine ETN ETN16680 283 | Aceh of Sumatra ETN ETN16681 284 | Adamawa-Ubangi ETN ETN16682 285 | Adi ETN ETN16683 286 | Afar ETN ETN16684 287 | Aimaq ETN ETN16685 288 | Albanian ETN ETN16686 289 | Altaic ETN ETN16687 290 | Amazon ETN ETN16688 291 | Anglo-American ETN ETN16689 292 | Anglo-Celt ETN ETN16690 293 | Armenian ETN ETN16699 294 | Assyrian / Aramaic ETN ETN16700 295 | Atlantic ETN ETN16701 296 | Atlantic-Jola ETN ETN16702 297 | Atlantic-Wolof ETN ETN16703 298 | Aymara ETN ETN16704 299 | Azerbaijani ETN ETN16705 300 | Aztec ETN ETN16706 301 | Bali-Sasak ETN ETN16707 302 | Baloch ETN ETN16708 303 | Baltic ETN ETN16709 304 | Banjar of Kalimantan ETN ETN16710 305 | Bantu ETN ETN16711 306 | Bantu, Makua-Yao ETN ETN16712 307 | Bantu, Cameroon-Bamileke ETN ETN16908 308 | Bantu, Central-Congo ETN ETN16909 309 | Bantu, Central-East ETN ETN16910 310 | Bantu, Central-Lakes ETN ETN16911 311 | Bantu, Central-Luba ETN ETN16912 312 | Bantu, Central-South ETN ETN16913 313 | Bantu, Central-Southeast ETN ETN16914 314 | Bantu, Central-Southwest ETN ETN16915 315 | Bantu, Central-Tanzania ETN ETN16916 316 | Bantu, Chewa-Sena ETN ETN16917 317 | Bantu, East-Coastal ETN ETN16918 318 | Bantu, Gikuyu-Kamba ETN ETN16919 319 | Bantu, Kongo ETN ETN16920 320 | Bantu, Nguni ETN ETN16921 321 | Bantu, Northwest ETN ETN16922 322 | Bantu, Shona ETN ETN16923 323 | Bantu, Sotho-Tswana ETN ETN16924 324 | Bantu, Southeastern ETN ETN16925 325 | Bantu, Swahili ETN ETN16926 326 | Makua-Yao ETN ETN16712 327 | Cameroon-Bamileke ETN ETN16908 328 | Central-Congo ETN ETN16909 329 | Central-East ETN ETN16910 330 | Central-Lakes ETN ETN16911 331 | Central-Luba ETN ETN16912 332 | Central-South ETN ETN16913 333 | Central-Southeast ETN ETN16914 334 | Central-Southwest ETN ETN16915 335 | Central-Tanzania ETN ETN16916 336 | Chewa-Sena ETN ETN16917 337 | East-Coastal ETN ETN16918 338 | Gikuyu-Kamba ETN ETN16919 339 | Kongo ETN ETN16920 340 | Nguni ETN ETN16921 341 | Northwest ETN ETN16922 342 | Shona ETN ETN16923 343 | Sotho-Tswana ETN ETN16924 344 | Southeastern ETN ETN16925 345 | Swahili ETN ETN16926 346 | Barito of Kalimantan ETN ETN16713 347 | Basque ETN ETN16714 348 | Batak-Nias of Sumatra ETN ETN16715 349 | Beja ETN ETN16718 350 | Bengali ETN ETN16719 351 | Benue ETN ETN16720 352 | Berber-Saharan ETN ETN16721 353 | Berber-Kabyle ETN ETN16722 354 | Berber-Riff ETN ETN16723 355 | Berber-Shawiya ETN ETN16724 356 | Berber-Shilha ETN ETN16725 357 | Bhil ETN ETN16726 358 | Bhojpur-Maithili ETN ETN16727 359 | Bhutanese ETN ETN16728 360 | Bihari ETN ETN16729 361 | Bouyei ETN ETN16730 362 | Brahui ETN ETN16731 363 | Bugi-Makassar of Sulawesi ETN ETN16732 364 | Bungku-Bajau ETN ETN16733 365 | Burmese ETN ETN16734 366 | Borneo-Kalimantan ETN ETN16735 367 | Afro-Caribbean ETN ETN16736 368 | Afro-Caribbean, Anglophone ETN ETN16737 369 | Afro-Caribbean, Dutch ETN ETN16906 370 | Afro-Caribbean, Francophone ETN ETN16907 371 | Anglophone ETN ETN16737 372 | Dutch ETN ETN16906 373 | Francophone ETN ETN16907 374 | Caucasus ETN ETN16738 375 | Chadic ETN ETN16739 376 | Kanuri-Saharan ETN ETN16740 377 | Cham ETN ETN16741 378 | Chinese ETN ETN16742 379 | Chinese-Hui ETN ETN16743 380 | Egyptian ETN ETN16744 381 | Ethiopian ETN ETN16745 382 | Fiji ETN ETN16746 383 | Filipino ETN ETN16747 384 | Filipino, Central ETN ETN16748 385 | Filipino, Muslim ETN ETN16749 386 | Filipino, Tribal ETN ETN16750 387 | Finno-Ugric ETN ETN16751 388 | Finno-Ugric, Saami ETN ETN16752 389 | Flores-Sumba-Alor ETN ETN16753 390 | French ETN ETN16754 391 | Fulani / Fulbe ETN ETN16755 392 | Garo-Tripuri ETN ETN16756 393 | Germanic ETN ETN16757 394 | Gond ETN ETN16758 395 | Gorontalo of Sulawesi ETN ETN16759 396 | Greek ETN ETN16760 397 | Guarani ETN ETN16761 398 | Guera-Naba of Chad ETN ETN16762 399 | Guinean ETN ETN16763 400 | Gujarati ETN ETN16764 401 | Gur ETN ETN16765 402 | Hani ETN ETN16766 403 | Hindi ETN ETN16767 404 | Hispanic ETN ETN16768 405 | Hungarian ETN ETN16769 406 | Igbo ETN ETN16770 407 | Ijaw ETN ETN16771 408 | Inuit ETN ETN16772 409 | Italian ETN ETN16773 410 | Japanese ETN ETN16774 411 | Jat ETN ETN16775 412 | Jawa ETN ETN16776 413 | Jews ETN ETN16777 414 | Kaili-Tomini of Sulawesi ETN ETN16778 415 | Kannada ETN ETN16779 416 | Kazakh ETN ETN16780 417 | Khoisan ETN ETN16781 418 | Kyrgyz ETN ETN16782 419 | Korean ETN ETN16783 420 | Kru ETN ETN16784 421 | Kuki-Chin-Naga ETN ETN16785 422 | Lampung of Sumatra ETN ETN16787 423 | Lao ETN ETN16788 424 | Li ETN ETN16789 425 | Lisu ETN ETN16790 426 | Madura of Java ETN ETN16791 427 | Malagasy ETN ETN16792 428 | Malay ETN ETN16793 429 | Malayali ETN ETN16794 430 | Maldivian ETN ETN16795 431 | Malinke ETN ETN16796 432 | Malinke-Bambara ETN ETN16797 433 | Malinke-Jula ETN ETN16798 434 | Maltese ETN ETN16799 435 | Maluku ETN ETN16800 436 | Maluku, Central ETN ETN16801 437 | Maluku, Northern ETN ETN16802 438 | Maluku, Southern ETN ETN16803 439 | Manchu ETN ETN16804 440 | Mande ETN ETN16805 441 | Marathi-Konkani ETN ETN16806 442 | Maya ETN ETN16807 443 | Melayu of Sumatra ETN ETN16808 444 | Miao / Hmong ETN ETN16809 445 | Micronesian ETN ETN16810 446 | Minahasa-Sangir of Sulawesi ETN ETN16811 447 | Minangkabau-Rejang of Sumatra ETN ETN16812 448 | Miri-Kachin ETN ETN16813 449 | Mixe ETN ETN16814 450 | Mixteco ETN ETN16815 451 | Mizo-Lushai ETN ETN16816 452 | Mongolian ETN ETN16817 453 | Mon-Khmer ETN ETN16818 454 | Munda-Santal ETN ETN16819 455 | Musi of Sumatra ETN ETN16820 456 | Nepali-Pahari ETN ETN16821 457 | New Caledonia ETN ETN16822 458 | New Guinea ETN ETN16823 459 | Nilotic ETN ETN16824 460 | North American Indigenous ETN ETN16825 461 | Nosu ETN ETN16826 462 | Nuba Mountains ETN ETN16827 463 | Nubian ETN ETN16828 464 | Nupe ETN ETN16829 465 | Nuristan ETN ETN16830 466 | Ogan of Sumatra ETN ETN16831 467 | Omotic ETN ETN16832 468 | Oraon ETN ETN16833 469 | Oriya ETN ETN16834 470 | Oromo ETN ETN16835 471 | Other Central American Indigenous ETN ETN16836 472 | Other Hispanic American ETN ETN16837 473 | Other Pacific Islanders ETN ETN16838 474 | Other Southeast Asian ETN ETN16839 475 | Other South Asian ETN ETN16840 476 | Other Sub-Saharan African ETN ETN16841 477 | Otomi ETN ETN16842 478 | Ouaddai-Fur ETN ETN16843 479 | Parsee ETN ETN16844 480 | Pasemah of Sumatra ETN ETN16845 481 | Pashtun ETN ETN16846 482 | Persian ETN ETN16847 483 | Polynesian ETN ETN16848 484 | Portuguese ETN ETN16849 485 | Portuguese, Brazilian ETN ETN16850 486 | Portuguese, European ETN ETN16851 487 | Punjabi ETN ETN16852 488 | Pygmy ETN ETN16853 489 | Quechua ETN ETN16854 490 | Rajasthan ETN ETN16855 491 | Romanian ETN ETN16856 492 | South Himalaya ETN ETN16857 493 | Sara-Bagirmi ETN ETN16858 494 | Scandinavian ETN ETN16859 495 | Sindhi ETN ETN16860 496 | Sinhala ETN ETN16861 497 | Solomons ETN ETN16865 498 | Somali ETN ETN16866 499 | Songhai ETN ETN16867 500 | Soninke ETN ETN16868 501 | South American Indigenous ETN ETN16869 502 | Spanish ETN ETN16870 503 | Sudanic ETN ETN16871 504 | Sunda-Betawi of Java ETN ETN16872 505 | Susu ETN ETN16873 506 | Tai ETN ETN16874 507 | Tai Dam ETN ETN16875 508 | Tai-Kadai ETN ETN16876 509 | Taiwan Indigenous ETN ETN16877 510 | Tajik ETN ETN16878 511 | Talysh ETN ETN16879 512 | Telugu ETN ETN16880 513 | Thai ETN ETN16881 514 | Timor ETN ETN16882 515 | Toraja of Sulawesi ETN ETN16883 516 | Tukangbesi of Sulawesi ETN ETN16884 517 | Turkish ETN ETN16885 518 | Turkmen ETN ETN16886 519 | Ural-Siberian ETN ETN16887 520 | Urdu Muslim ETN ETN16888 521 | Uzbek ETN ETN16889 522 | Vanuatu ETN ETN16890 523 | Vietnamese ETN ETN16891 524 | West China / Lolo ETN ETN16892 525 | West Malaysia Indigenous ETN ETN16893 526 | Yao-Mien ETN ETN16894 527 | Zapoteco ETN ETN16895 528 | Zhuang ETN ETN16896 529 | Undefined ETN ETN16897 530 | Arab World ETN ETN16898 531 | Caucasian Peoples ETN ETN16899 532 | Sub-Saharan African ETN ETN16900 533 | Luri-Bakhtiari ETN ETN16901 534 | Deaf ETN ETN16902 535 | Afro-American ETN ETN16903 536 | Afro-American, Hispanic ETN ETN16904 537 | Afro-American, Northern ETN ETN16905 538 | Banda ETN ETN533 539 | Ideological --- OPP 540 | Nationalist --- OPP173 541 | Fundamentalist -- OPP174 542 | Secular --- OPP175 543 | Tribalist --- OPP176 544 | Communist --- OPP177 545 | Center Left --- OPP178 546 | Center Right --- OPP179 547 | Far Left --- OPP180 548 | Far Right --- OPP181 549 | Centrist --- OPP182 550 | Libertarian --- OPP183 551 | Anarchist --- OPP184 552 | Nongovernmental Organization (International) IGO IGO 553 | Medical / Health IGOs IGOHLH IGOHLH 554 | Business IGOs IGOBUS IGOBUS 555 | Refugees IGOs IGOREF IGOREF 556 | Education IGOs IGOEDU IGOEDU 557 | Development IGOs IGODEV IGODEV 558 | Energy IGOs IGOBUS IGOBUS 559 | Agricultural IGOs IGOAGR IGOAGR 560 | Human Rights IGOs IGOHRI IGOHRI 561 | Charity IGOs IGO IGO 562 | Information / Communication / Transparency IGOs IGO IGO 563 | Environment IGOs IGO IGO196 564 | Legal IGOs IGO IGO9f7 565 | International Government Organization IGO IGO 566 | Global IGO IGO 567 | Global Information / Communication / Transparency IGOs IGOMED IGOMED 568 | Global Energy IGOs IGOBUS IGOBUS 569 | Global Diplomatic IGOs IGO IGO 570 | Global Defense / Security IGOs IGOMIL IGOMIL 571 | Global Law / Justice / Judicial IGOs IGOJUD IGOJUD 572 | Global Environment IGOs IGOENV IGOENV 573 | Global Economic/Financial/Trade IGOs IGOBUS IGOBUS 574 | Global Development IGOs IGODEV IGODEV 575 | Global Health IGOs IGOHLH IGOHLH 576 | Global Human Rights IGOs IGOHRI IGOHRI 577 | Global Agricultural IGOs IGOAGR IGOAGR 578 | Global Refugees IGOs IGOREF IGOREF 579 | Regional IGO IGO 580 | Regional Information / Communication / Transparency IGOs IGOMED IGOMED 581 | Regional Energy IGOs IGOBUS IGOBUS 582 | Regional Diplomatic IGOs IGO IGO 583 | Regional Defense / Security IGOs IGOMIL IGOMIL 584 | Regional Law / Justice / Judicial IGOs IGOJUD IGOJUD 585 | Regional Environment IGOs IGOENV IGOENV 586 | Regional Economic/Financial/Trade IGOs IGOBUS IGOBUS 587 | Regional Development IGOs IGODEV IGODEV 588 | Regional Health IGOs IGOHLH IGOHLH 589 | Regional Human Rights IGOs IGOHRI IGOHRI 590 | Regional Agricultural IGOs IGOAGR IGOAGR 591 | Regional Refugees IGOs IGOREF IGOREF 592 | International Dissident INT INT 593 | International Criminals / Gangs INT INT205 594 | International Protestors / Popular Opposition / Mobs INT INT206 595 | International Banned Parties INT INT207 596 | International Exiles INT INT208 597 | International Radicals / Extremists / Fundamentalists IMG IMG 598 | International Terrorists IMG IMG210 599 | International Rebels IMG IMG211 600 | International Insurgents IMG IMG212 601 | International Separatists IMG IMG213 602 | Multinational Corporation MNC MNC 603 | Agricultural MNCs MNC MNC215 604 | Transportation MNCs MNC MNC216 605 | Utilities MNCs MNC MNC217 606 | Heavy Industrial / Chemical MNCs MNC MNC218 607 | Defense / Security MNCs MNC MNC219 608 | Durable Goods MNCs MNC MNC220 609 | Consumer Goods MNCs MNC MNC221 610 | Consumer Services MNCs MNC MNC222 611 | Consulting / Financial Services MNCs MNC MNC223 612 | Science / Tech / Knowledge / Innovation MNCs MNC MNC224 613 | Medical / Health / Pharmeceutical MNCs MNC MNC225 614 | NULL ——— -------------------------------------------------------------------------------- /countrynames.txt: -------------------------------------------------------------------------------- 1 | NULL --- 000 2 | Afghanistan AFG 700 3 | Albania ALB 339 4 | Algeria DZA 615 5 | American Samoa USAASM 000 6 | Andorra AND 232 7 | Angola AGO 540 8 | Anguilla AIA 000 9 | Antigua and Barbuda ATG 058 10 | Antarctica ANT 000 11 | Argentina ARG 160 12 | Armenia ARM 371 13 | Aruba ABW 000 14 | Australia AUS 900 15 | Austria AUT 305 16 | Azerbaijan AZE 373 17 | Bahamas BHS 031 18 | Bahrain BHR 692 19 | Bangladesh BGD 771 20 | Barbados BRB 053 21 | Belarus BLR 370 22 | Belgium BEL 211 23 | Belize BLZ 080 24 | Benin BEN 434 25 | Bermuda BMU 000 26 | Bhutan BTN 760 27 | Bolivia BOL 145 28 | Bosnia and Herzegovina BIH 346 29 | Botswana BWA 571 30 | Brazil BRA 140 31 | British Virgin Islands GBRVGB 000 32 | Brunei Darussalam BRN 835 33 | Brunei BRN 835 34 | Bulgaria BGR 355 35 | Burkina Faso BFA 439 36 | Burundi BDI 516 37 | Cambodia KHM 811 38 | Cameroon CMR 471 39 | Canada CAN 020 40 | Cape Verde CPV 402 41 | Cayman Islands CYM 000 42 | Central African Republic CAF 482 43 | Chad TCD 483 44 | Chile CHL 155 45 | China CHN 710 46 | Colombia COL 100 47 | Comoros COM 581 48 | Congo COG 484 49 | Cook Islands COK 000 50 | Cook Island COK 000 51 | Costa Rica CRI 094 52 | Cote d'Ivoire CIV 437 53 | Cote d'Ivoire CIV 437 54 | Croatia HRV 344 55 | Cuba CUB 040 56 | Curaçao CUW 000 57 | Cyprus CYP 352 58 | Czech Republic CZE 316 59 | Democratic Republic of the Congo COD 490 60 | Democratic Republic of Congo COD 490 61 | Denmark DNK 390 62 | Djibouti DJI 522 63 | Dominica DMA 054 64 | Dominican Republic DOM 042 65 | Ecuador ECU 130 66 | Egypt EGY 651 67 | El Salvador SLV 092 68 | Equatorial Guinea GNQ 411 69 | Eritrea ERI 531 70 | Estonia EST 366 71 | Ethiopia ETH 530 72 | Faeroe Islands DNKFRO 000 73 | Faroe Islands DNKFRO 000 74 | Falkland Islands GBRFLK 000 75 | Fiji FJI 950 76 | Finland FIN 375 77 | France FRA 220 78 | French Guiana FRAGUF 000 79 | French Polynesia FRAPYF 000 80 | Gabon GAB 481 81 | Gambia GMB 420 82 | Georgia GEO 372 83 | Germany DEU 255 84 | Ghana GHA 452 85 | Gibraltar GBRGIB 000 86 | United Kingdom GBR 200 87 | Greece GRC 350 88 | Greenland DNKGRL 000 89 | Grenada GRD 055 90 | Guadeloupe FRAGLP 000 91 | Guam USAGUM 000 92 | Guatemala GTM 090 93 | Guernsey GGY 000 94 | Guinea GIN 95 | Guinea Bissau GNB 404 96 | Guinea-Bissau GNB 404 97 | Guyana GUY 110 98 | Haiti HTI 041 99 | Holy See VAT 327 100 | Honduras HND 091 101 | Hong Kong HKG 997 102 | Hungary HUN 310 103 | Iceland ISL 395 104 | India IND 750 105 | Indonesia IDN 850 106 | Iran IRN 630 107 | Iraq IRQ 645 108 | Ireland IRL 205 109 | Isle Of Man GBRIMY 000 110 | Israel ISR 666 111 | Italy ITA 325 112 | Jamaica JAM 051 113 | Japan JPN 740 114 | Jersey GBR 000 115 | Jordan JOR 663 116 | Kazakhstan KAZ 705 117 | Kenya KEN 501 118 | Kiribati KIR 946 119 | Kosovo KSV 347 120 | Kyrgyzstan KGZ 703 121 | Kuwait KWT 690 122 | Laos LAO 812 123 | Latvia LVA 367 124 | Lebanon LBN 660 125 | Lesotho LSO 570 126 | Liberia LBR 450 127 | Libya LBY 620 128 | Liechtenstein LIE 223 129 | Lithuania LTU 368 130 | Luxembourg LUX 212 131 | Macedonia MKD 343 132 | the former Yugoslav Republic of Macedonia MKD 343 133 | Macao MAC 000 134 | Madagaskar MDG 580 135 | Madagascar MDG 580 136 | Malawi MWI 553 137 | Malaysia MYS 820 138 | Maldives MDV 781 139 | Mali MLI 432 140 | Malta MLT 338 141 | Marshall Islands MHL 983 142 | Martinique MTQ 000 143 | Mauritania MRT 435 144 | Mauritius MUS 590 145 | Mayotte FRA 000 146 | Mexico MEX 070 147 | Micronesia FSM 987 148 | Moldova MDA 359 149 | Moldova, Republic of MDA 359 150 | Monaco MCO 221 151 | Mongolia MNG 712 152 | Montserrat GBRMSR 000 153 | Montenegro MNE 341 154 | Morocco MAR 600 155 | Mozambique MOZ 541 156 | Myanmar MMR 775 157 | Namibia NAM 565 158 | Nauru NRU 970 159 | Nepal NPL 790 160 | Netherlands NLD 210 161 | Netherlands Antilles NLDANT 000 162 | New Caledonia FRANCL 000 163 | New Zealand NZL 920 164 | Nicaragua NIC 093 165 | Niger NER 436 166 | Nigeria NGA 475 167 | Niue NZLNIU 000 168 | Norfolk Island AUSNFK 000 169 | North Korea PRK 731 170 | Northern Mariana Islands MNP 000 171 | Norway NOR 385 172 | Oman OMN 698 173 | Pakistan PAK 770 174 | Palau PLW 986 175 | Occupied Palestinian Territory PSE 000 176 | Palestine PSE 000 177 | Gaza PSEGZA 000 178 | Panama PAN 095 179 | Papua New Guinea PNG 910 180 | Paraguay PRY 150 181 | Peru PER 135 182 | Philippines PHL 840 183 | Pitcairn GBRPCN 000 184 | Poland POL 290 185 | Portugal PRT 235 186 | Puerto Rico USAPRI 000 187 | Qatar QAT 694 188 | Republic of South Africa ZAF 560 189 | South Africa ZAF 560 190 | Romania ROU 360 191 | Russian Federation RUS 365 192 | Russia RUS 365 193 | Rwanda RWA 517 194 | Saint Barthelemy FRA 000 195 | Saint Helena GBRSHN 000 196 | Saint Lucia LCA 056 197 | Saint Kitts and Nevis KNA 060 198 | Saint Martin (French part) MAF 000 199 | Saint Vincent and the Grenadines VCT 057 200 | Samoa WSM 990 201 | San Marino SMR 331 202 | Sao Tome and Principe STP 403 203 | Saudi Arabia SAU 670 204 | Senegal SEN 433 205 | Serbia and Montenegro SRB 345 206 | Serbia SRB 345 207 | Seychelles SYC 591 208 | Sierra Leone SLE 451 209 | Singapore SGP 830 210 | Sint Maarten SXM 000 211 | Slovakia SVK 317 212 | Slovenia SVN 349 213 | Solomon Islands SLB 940 214 | Somalia SOM 520 215 | South Korea KOR 732 216 | South Sudan SSD 626 217 | Spain ESP 230 218 | Sri Lanka LKA 780 219 | Sudan SDN 625 220 | Suriname SUR 115 221 | Swaziland SWZ 572 222 | Sweden SWE 380 223 | Switzerland CHE 225 224 | Syria SYR 652 225 | Taiwan TWN 713 226 | Tajikistan TJK 702 227 | United Republic Of Tanzania TZA 510 228 | Tanzania TZA 510 229 | Thailand THA 800 230 | Timor Leste TMP 860 231 | Timor-Leste TMP 860 232 | Togo TGO 461 233 | Tokelau NZLTKL 000 234 | Tonga TON 955 235 | Trinidad and Tobago TTO 052 236 | Tunisia TUN 616 237 | Turkey TUR 640 238 | Turkmenistan TKM 701 239 | Turks and Caicos Islands TCA 000 240 | Tuvalu TUV 947 241 | Uganda UGA 500 242 | Ukraine UKR 369 243 | United Arab Emirates ARE 696 244 | Uruguay URY 165 245 | United States USA 002 246 | United States Virgin Islands USAVIR 000 247 | U.S. Virgin Islands USAVIR 000 248 | Uzbekistan UZB 704 249 | Vanuatu VUT 935 250 | Venezuela VEN 101 251 | Vietnam VNM 816 252 | Wallis and Futuna Islands FRNWLF 000 253 | Wallis and Futuna FRNWLF 000 254 | Western Sahara ESH 000 255 | Yemen YEM 679 256 | Zambia ZMB 551 257 | Zimbabwe ZWE 552 258 | British Indian Ocean Territory IOT 000 259 | Christmas Island CXR 000 260 | Cocos Islands CCK 000 261 | Cocos (Keeling) Islands CCK 000 -------------------------------------------------------------------------------- /text_to_CAMEO.py: -------------------------------------------------------------------------------- 1 | """ 2 | text_to_CAMEO.py 3 | 4 | This program takes data in the text-oriented format of the ICEWS files and converts this to a more conventional data 5 | format using the CAMEO codes. The conversion process is described in detail in the file text_to_CAMEO_documentation.pdf. 6 | 7 | Repository for code: https://github.com/openeventdata/text_to_CAMEO 8 | 9 | To run: python text_to_CAMEO.py [[-F] [-c] [-t ] [-m] 10 | 11 | Options: 12 | -F: Process the FOUO format. Default: Process the Dataverse format 13 | -c: Include COW numerical country codes in addition to ISO-3166 code. Default: Include only the ISO codes 14 | -t: Process the files listed one per line in the text file . 15 | Default: process all of the files in the working directory that end in “.csv” (FOUO format) or “.tab”/".tsv" (Dataverse format) 16 | -m: Output all of the substate agents in a concatenated string. 17 | 18 | Requires: 19 | CAMEO_codefile.txt [FOUO format] 20 | countrynames.txt 21 | agentnames.txt 22 | 23 | Comments on the April-2017 modifications 24 | 25 | 1. This program merges two earlier versions I'd used separately for the two versions. Both of those were used 26 | successfully in large-scale projects so I'm confident they were working, but I've not done that sort of testing 27 | on this merged version (I have done basic testing on it...really...). So definitely check and make sure the output 28 | makes sense. 29 | 30 | 2. The fact that there are at least two incompatible formats of the ICEWS data suggests the possibility that there 31 | might be others: again, check to make sure your output makes sense. 32 | 33 | 3. There's an assortment of “commented out” code in the program that was used earlier to do some basic marginals on 34 | the data: this could be reactivate but is in Python 2.6 and will also need a bit of updating to Python 3.5. 35 | 36 | SYSTEM REQUIREMENTS 37 | This program has been successfully run under Mac OS 10.10; it is standard Python 3.5 so it should also run in Unix or Windows. 38 | 39 | PROVENANCE: 40 | Programmer: Philip A. Schrodt 41 | Parus Analytics LLC 42 | Charlottesville, VA 22901 U.S.A. 43 | http://eventdata.parusanalytics.com 44 | 45 | Copyright (c) 2021 Philip A. Schrodt. All rights reserved. 46 | 47 | The MIT License (MIT) 48 | 49 | Permission is hereby granted, free of charge, to any person obtaining a copy of 50 | this software and associated documentation files (the "Software"), to deal in 51 | the Software without restriction, including without limitation the rights to 52 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 53 | the Software, and to permit persons to whom the Software is furnished to do so, 54 | subject to the following conditions: 55 | 56 | The above copyright notice and this permission notice shall be included in all 57 | copies or substantial portions of the Software. 58 | 59 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 60 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 61 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 62 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 63 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 64 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 65 | 66 | Report bugs to: schrodt735@gmail.com 67 | 68 | REVISION HISTORY: 69 | 18-June-14: Initial version 70 | 30-March-15: Modified to work with DataVerse filenames 71 | 28-April-17: Align assignment of quad codes with polarity of Goldstein scores; added command-line options; merged FOUO and Dataverse versions 72 | 26-Mar-2020: Code both .tab and .tsv files in default mode to accommodate 2018 change in formatting: see GitHub README.md 73 | 74 | ---------------------------------------------------------------------------------- 75 | """ 76 | # coding: utf-8 77 | import sys 78 | import os 79 | 80 | # ======== global initializations ========= # 81 | 82 | CAMEO_eventcodes = {} # conversion dictionaries 83 | CAMEO_codefile = "CAMEO_codefile.txt" # translates event text to CAMEO event codes 84 | countryfile = "countrynames.txt" # translates country names to ISO-3166-alpha-3 and COW numeric codes 85 | agentfile = "agentnames.txt" # translates 'sectors' text to CAMEO agent codes 86 | outfile_prefix = "reduced.ICEWS." 87 | 88 | FILELISTNAME = "" 89 | 90 | """ 91 | srccountry = {} # ancillary dictionaries used to do frequency counts 92 | tarcountry = {} 93 | events = {} 94 | """ 95 | 96 | countrynames = {} 97 | Missing_names = {} 98 | sectornames = {} 99 | sectorcounts = {} 100 | Missing_sectors = {} 101 | 102 | 103 | # ordered list of CAMEO agent codes to extract for the agent field: see documentation 104 | agentcodes = ['MIL','GOV','REB','OPP', 'PTY', 'COP','JUD','SPY','IGO','MED','EDU','BUS','CRM','CVL','---'] 105 | 106 | # ============ function definitions ================ # 107 | 108 | def do_count(thedict, fieldindex): 109 | if field[fieldindex] in thedict: 110 | thedict[field[fieldindex]] += 1 111 | else: 112 | thedict[field[fieldindex]] = 1 113 | 114 | def get_event_code(phrase): 115 | if phrase in CAMEO_eventcodes: 116 | return CAMEO_eventcodes[phrase] 117 | else: 118 | # print 'Missing:',phrase 119 | # do_sub_count(Missing_eventcodes, phrase) 120 | return '' 121 | 122 | """def do_sub_count(thedict, phrase): 123 | if phrase in thedict: 124 | thedict[phrase] += 1 125 | else: 126 | thedict[phrase] = 1 """ 127 | 128 | def get_country_code(phrase): 129 | if phrase in countrynames: 130 | return countrynames[phrase] 131 | else: 132 | # print 'Missing:',phrase 133 | # do_sub_count(Missing_names, phrase) 134 | return '' 135 | 136 | def get_sector_code(phrase): 137 | if phrase in sectornames: 138 | return sectornames[phrase] 139 | else: 140 | # print 'Missing:',phrase 141 | # do_sub_count(Missing_sectors, phrase) 142 | return '' 143 | 144 | def reduce_sectors(): 145 | # print("RS:",agentlist) 146 | if ALLAGENTS: 147 | if agentlist: 148 | agtset = set(agentlist) 149 | agtset.discard("---") 150 | agtlst = list(agtset) 151 | return "-".join(agtlst) 152 | else: 153 | return "OTH" 154 | else: 155 | for code in agentcodes: 156 | if code in agentlist: 157 | return code 158 | if agentlist: 159 | return agentlist[0] 160 | else: 161 | return 'OTH' 162 | 163 | """def print_sorted_dict(thedict): 164 | print("\n", end='') 165 | d_view = sorted( ((v,k) for k,v in thedict.iteritems()), reverse=True) 166 | for v,k in d_view: 167 | print(v,k)""" 168 | 169 | 170 | 171 | # ============ main program =============== # 172 | 173 | # read command line, which can in fact be done without using the argparse library... 174 | # print(sys.argv) 175 | if "-F" in sys.argv: 176 | DO_FOUO = True 177 | datefield = 1 178 | evtfield = 5 179 | srcagtfield = 3 180 | srcfield = 4 # remaining fields set after we see whether a file has CAMEO codes 181 | 182 | CAMEO_eventcodes = {} # conversion dictionaries 183 | Missing_eventcodes = {} # debugging dictionaries used to check for phrases not in the files 184 | 185 | try: 186 | fin = open(CAMEO_codefile,'r') 187 | except IOError: 188 | print("\aError: Could not find the event code file", CAMEO_codefile) 189 | sys.exit() 190 | 191 | caseno = 1 192 | line = fin.readline() 193 | while len(line) > 0: 194 | if line.startswith('LABEL'): 195 | part = line[line.find(' ')+1:].partition(' ') 196 | CAMEO_eventcodes[part[2][:-1]] = part[0][:-1] 197 | # print CAMEO_eventcodes[part[2][:-1]] 198 | caseno += 1 199 | # if caseno > 32: break # debugging exit 200 | line = fin.readline() 201 | else: 202 | DO_FOUO = False 203 | datefield = 1 204 | evtfield = 6 205 | srcfield = 4 206 | tarfield = 10 207 | srcagtfield = 3 208 | taragtfield = 9 209 | goldscorefield = 7 210 | cameofield = -1 211 | 212 | if "-c" in sys.argv: 213 | DO_COW = True 214 | NULLCASE = ("---","000") 215 | else: 216 | DO_COW = False 217 | NULLCASE = "---" 218 | 219 | ALLAGENTS = "-m" in sys.argv 220 | 221 | 222 | if "-t" in sys.argv: 223 | FILELISTNAME = sys.argv[sys.argv.index("-t") + 1] 224 | try: 225 | with open(FILELISTNAME,'r') as fdir: 226 | filelist = [fn[:-1] for fn in fdir] 227 | except IOError: 228 | print("\aError: Could not find the file list", FILELISTNAME) 229 | sys.exit() 230 | else: 231 | directory = os.getcwd() 232 | filelist = [] 233 | if DO_FOUO: 234 | suffix = [".csv"] 235 | else: 236 | suffix = [".tab", ".tsv"] 237 | for path, subdirs, files in os.walk(directory): # get list of ICEWS files based on extension 238 | for name in files: 239 | if name[-4:] in suffix: 240 | filelist.append(os.path.join(path,name)) 241 | 242 | try: 243 | fin = open(countryfile,'r') 244 | except IOError: 245 | print("\aError: Could not find the country names file", countryfile) 246 | sys.exit() 247 | 248 | line = fin.readline() 249 | while len(line) > 0: 250 | part = line.split('\t') 251 | if DO_COW: 252 | countrynames[part[0]] = (part[1],part[2][:-1]) 253 | else: 254 | countrynames[part[0]] = (part[1],) 255 | line = fin.readline() 256 | 257 | fin.close() 258 | #for k,v in countrynames.iteritems(): print v,k 259 | #sys.exit() 260 | 261 | 262 | try: 263 | fin = open(agentfile,'r') 264 | except IOError: 265 | print("\aError: Could not find the agents file", agentfile) 266 | sys.exit() 267 | 268 | line = fin.readline() 269 | while len(line) > 0: 270 | part = line.split('\t') 271 | sectornames[part[0]] = part[1] 272 | line = fin.readline() 273 | 274 | fin.close() 275 | 276 | #fscr = open("MIL_cases.csv", "w") 277 | 278 | for filename in filelist: 279 | try: 280 | fin = open(filename,'r') 281 | print('Reading',filename) 282 | if DO_FOUO: # figure out the format in this filelist 283 | line = fin.readline() # get header 284 | field = line.split('\t') 285 | if field[6] == "Intensity": 286 | tarfield = 9 287 | taragtfield = 8 288 | goldscorefield = 6 289 | cameofield = -1 290 | else: 291 | cameofield = 6 292 | goldscorefield = 7 293 | taragtfield = 9 294 | tarfield = 10 295 | 296 | except IOError: 297 | print("\aError: Could not find the input file", infile) 298 | sys.exit() 299 | fout = open(outfile_prefix+os.path.basename(filename)[:-3]+'txt','w') 300 | 301 | line = fin.readline() 302 | caseno = 1 303 | while len(line) > 0: 304 | # if "ilitary" in line: fscr.write(line) 305 | line = line.replace('\t\t','\tNULL\t') # replace missing field with 'NULL' 306 | line = line.replace('\t\t','\tNULL\t') # in case there are two missing fields in a row 307 | line = line.replace('\t\t','\tNULL\t') # in case there are missing fields in a row 308 | field = line.split('\t') 309 | # for ka in range(len(field)): print ka, field[ka] 310 | outlist = [field[datefield]] 311 | 312 | # do_count(srccountry, srcfield) 313 | # do_count(tarcountry, tarfield) 314 | if field[srcfield] in countrynames: 315 | outlist.extend(get_country_code(field[srcfield])) 316 | else: 317 | outlist.extend(NULLCASE) 318 | subfields = field[srcagtfield].split(',') 319 | if subfields: 320 | agentlist = [] 321 | for phrase in subfields: 322 | # do_sub_count(sectorcounts, phrase) 323 | agentlist.append(get_sector_code(phrase)) 324 | outlist.append(reduce_sectors()) 325 | 326 | if field[tarfield] in countrynames: 327 | outlist.extend(get_country_code(field[tarfield])) 328 | else: 329 | outlist.extend(NULLCASE) 330 | subfields = field[taragtfield].split(',') 331 | if subfields: 332 | agentlist = [] 333 | for phrase in subfields: 334 | # do_sub_count(sectorcounts, phrase) 335 | # if "ilitary" in line: fscr.write(line) 336 | agentlist.append(get_sector_code(phrase)) 337 | outlist.append(reduce_sectors()) 338 | 339 | # do_count(events, evtfield) # debug: checks distribution of events 340 | if DO_FOUO: 341 | if cameofield > 0: 342 | camcode = field[cameofield] 343 | else: 344 | camcode = get_event_code(field[evtfield].strip()) 345 | else: 346 | camcode = field[evtfield].strip() 347 | outlist.append(camcode) 348 | outlist.append(field[goldscorefield]) 349 | if camcode[0] == '2': # determine the quad code 350 | quad = '4' 351 | elif camcode[0:2] == '09': # investigate, which frankly has always been ambiguous 352 | quad = '3' 353 | elif camcode[0] == '0': 354 | if camcode[1] < '6': 355 | if float(field[goldscorefield]) >= 0.0: 356 | quad = '1' 357 | else: 358 | quad = '3' 359 | else: 360 | quad = '2' 361 | else: 362 | if camcode[1] < '5': 363 | quad = '3' 364 | else: 365 | quad = '4' 366 | outlist.append(quad) 367 | 368 | # print outlist 369 | #if '---' not in outlist: 370 | fout.write('\t'.join(outlist)+'\n') 371 | # print("-->", outlist) 372 | caseno += 1 373 | # if caseno > 16: break # debugging 374 | # if caseno > 16: sys.exit() # debugging exit 375 | line = fin.readline() 376 | 377 | fin.close() 378 | fout.close() 379 | 380 | #fscr.close() 381 | 382 | """ 383 | <17.04.28> This code hasn't been transitioned to Python 3.5 384 | print_sorted_dict(events) # code for printing frequencies 385 | print_sorted_dict(srccountry) 386 | print_sorted_dict(tarcountry) 387 | """ 388 | 389 | """ 390 | # code for printing sector frequencies 391 | print "\n", 392 | d_view = sorted( ((v,k) for k,v in sectorcounts.iteritems()), reverse=True) 393 | total = 0 394 | for v,k in d_view: 395 | total += v 396 | print "Total sector codes:",total 397 | for v,k in d_view: 398 | print v,v*10000/total, k, 399 | if k in sectornames: 400 | print sectornames[k] 401 | else: print '---' 402 | if v*10000/total < 1: # stop printing when the proportion is less than 0.01% 403 | break 404 | 405 | 406 | print "=== MISSING PHRASES ====", # code of printing missing phrases 407 | #print_sorted_dict(Missing_eventcodes) 408 | #print_sorted_dict(Missing_names) 409 | print_sorted_dict(Missing_sectors) 410 | """ 411 | print("Finished") 412 | 413 | -------------------------------------------------------------------------------- /text_to_CAMEO_1.1B1_doc.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openeventdata/text_to_CAMEO/6d7b3abebe2585bcbe48053987382d2af1865caf/text_to_CAMEO_1.1B1_doc.odt -------------------------------------------------------------------------------- /text_to_CAMEO_1.1B1_doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openeventdata/text_to_CAMEO/6d7b3abebe2585bcbe48053987382d2af1865caf/text_to_CAMEO_1.1B1_doc.pdf --------------------------------------------------------------------------------