├── .gitignore ├── README.md ├── es.js ├── file_prepare.js ├── gdelt ├── columns.js ├── eventcode_names.js ├── eventrootcode_names.js ├── formatter.js └── quadclass_names.js ├── indexer.js ├── package.json └── samples └── 20151028.export.CSV /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | node_modules 3 | .idea 4 | samples 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Elasticsearch 6.x + GDELT 2 | ### Get data into ES, display and filter with Kibana 3 | === 4 | 5 | === 6 | ## Setup 7 | ### 1. Install Elastic 8 | Download and Run ElasticSearch 6.x from https://www.elastic.co/downloads/elasticsearch 9 | Download and Run Kibana 6.x from https://www.elastic.co/downloads/kibana 10 | 11 | ### 2. Set up index mapping 12 | In Kibana dev console: http://localhost:5601/app/kibana#/dev_tools/console?_g=(), set up an index template. 13 | This basically tells elastic that by default, we're not looking to analyze any strings for full text search. 14 | 15 | ```curl 16 | PUT _template/template_1 17 | { 18 | "index_patterns": ["*"], 19 | "settings": { 20 | "number_of_shards": 1 21 | }, 22 | "mappings": { 23 | "_default_": { 24 | "dynamic_templates": [ 25 | { 26 | "notanalyzed": { 27 | "match": "*", 28 | "match_mapping_type": "string", 29 | "mapping": { 30 | "type": "keyword" 31 | } 32 | } 33 | } 34 | ] 35 | } 36 | 37 | } 38 | } 39 | ``` 40 | 41 | Set up mapping for the GEO & Date 42 | ```curl 43 | PUT /elastic_gdelt 44 | { 45 | "settings" : { 46 | "index" : { 47 | "number_of_shards" : 1, 48 | "number_of_replicas" : 0 49 | } 50 | }, 51 | "mappings": { 52 | "event": { 53 | "properties": { 54 | "ActionGeo_Location": { 55 | "type": "geo_point" 56 | }, 57 | "Actor1Geo_Location": { 58 | "type": "geo_point" 59 | }, 60 | "Actor2Geo_Location": { 61 | "type": "geo_point" 62 | }, 63 | "DATEADDED" : { 64 | "type":"date", 65 | "format": "yyyy-MM-dd" 66 | } 67 | } 68 | } 69 | } 70 | } 71 | ``` 72 | 73 | I ran into a stupid issue with elastic not allowing me to add data because of low disk space on my local machine. 74 | 75 | I set more loose threshholds 76 | ```curl 77 | PUT _cluster/settings 78 | { 79 | "transient": { 80 | "cluster.routing.allocation.disk.watermark.low": "2gb", 81 | "cluster.routing.allocation.disk.watermark.high": "1gb", 82 | "cluster.routing.allocation.disk.watermark.flood_stage": "1gb", 83 | "cluster.info.update.interval": "1m" 84 | } 85 | } 86 | ``` 87 | 88 | And apparently it needs a reset. This solves it (https://github.com/elastic/kibana/issues/13685): 89 | 90 | ```curl 91 | curl -XPUT -H "Content-Type: application/json" https://[YOUR_ELASTICSEARCH_ENDPOINT]:9200/_all/_settings -d '{"index.blocks.read_only_allow_delete": null}' 92 | ``` 93 | 94 | ### 3. Add Gdelt data 95 | ``` 96 | npm install 97 | ``` 98 | 99 | Then download and index the events. It's hardcoded to `20180117` (17 Jan 2018), but can use any date(s) in GDELT. 100 | ``` 101 | npm run download 102 | ``` 103 | 104 | ### 4. Build a cool dashboard with Kibana 105 | I leave this up to you as I didn't have time to properly export the kibana data. 106 | Would be cool to see usage with the elasticsearch graph features and significant terms aggregations. 107 | -------------------------------------------------------------------------------- /es.js: -------------------------------------------------------------------------------- 1 | const elasticsearch = require('elasticsearch'); 2 | const _ = require ('lodash'); 3 | 4 | const sendThreshhold = 5000; 5 | const timerThreshold = 10000; 6 | 7 | class esIndexer { 8 | constructor(options) { 9 | var that = this; 10 | this.client = new elasticsearch.Client({ 11 | host: 'http://localhost:9200', 12 | // log: 'trace' 13 | }); 14 | this._index = options._index; 15 | this._type = options._type; 16 | this._mapping = options._mapping; 17 | this._action = { index: { _index: this._index, _type: this._type} }; 18 | this._batches = []; 19 | this._newbatch(); 20 | this._lastBatchSend = Date.now(); 21 | 22 | this.scheduler = setInterval(function () { 23 | if(that._queueLength() > 0 && ( (Date.now()-that._lastBatchSend) > timerThreshold ) ){ 24 | console.log("Timer - send now"); 25 | that._sendBatch(); 26 | } 27 | }, timerThreshold); 28 | 29 | } 30 | 31 | _newbatch(){ 32 | this._batches.push({ 33 | bulkData: [], 34 | callbacks: [], 35 | }); 36 | } 37 | 38 | _sendBatch(){ 39 | var batch = this._batches.pop(); 40 | this._lastBatchSend = Date.now(); 41 | this.client.bulk({ 42 | body: batch.bulkData 43 | }, function (err, resp) { 44 | console.dir(`Indexed ${resp.errors ? 'WITH ERRORS' : 'successfully'}. Took: ${resp.took}ms. Items: ${resp.items.length}`, {colors: true, depth: null}); 45 | // console.dir(resp, {colors: true, depth: null}); 46 | 47 | if(err || !resp || !resp.items || resp.items.length !== batch.callbacks.length) { 48 | console.error(err); 49 | batch.callbacks.forEach((callback)=>{ 50 | if(callback){ 51 | callback(err,'failed to index\n'); 52 | } 53 | }); 54 | } 55 | let successFull = 0, failed =0, summary = `Batch of ${batch.callbacks.length} docs done.`; 56 | let items = resp.items; 57 | batch.callbacks.forEach((callback, index)=>{ 58 | if(callback){ 59 | // console.dir(items, {colors: true, depth: null}); 60 | 61 | if(_.get(items[index], 'index.status') === 201){ 62 | callback(err,""); 63 | successFull++; 64 | } 65 | else { 66 | //callback(err,""); 67 | callback(err,`Indexing failure ${JSON.stringify(items[index])}\n`); 68 | failed++; 69 | } 70 | } 71 | }); 72 | if(successFull>0){ 73 | summary+= ` Successful: ${successFull}`; 74 | } 75 | if(failed>0){ 76 | summary+= ` Failed: ${failed}`; 77 | } 78 | console.log(summary); 79 | 80 | batch = null; // v8 memory clear. 81 | }); 82 | this._newbatch(); 83 | } 84 | 85 | _queueLength(){ 86 | return this._batches[this._batches.length-1].callbacks.length; 87 | } 88 | 89 | indexDoc(data,callback) { 90 | this._batches[this._batches.length-1].bulkData.push(this._action,data); 91 | this._batches[this._batches.length-1].callbacks.push(callback); 92 | if(this._queueLength() >= sendThreshhold){ 93 | this._sendBatch(); 94 | } 95 | } 96 | }; 97 | 98 | module.exports = esIndexer; 99 | -------------------------------------------------------------------------------- /file_prepare.js: -------------------------------------------------------------------------------- 1 | const indexer = require('./indexer'); 2 | 3 | const dateToFetch= '20180117'; 4 | const ZIP_DIR = `./samples`; 5 | 6 | const AdmZip = require('adm-zip'), 7 | http = require('http'), 8 | fs = require('fs'); 9 | 10 | 11 | const url = `http://data.gdeltproject.org/events/${dateToFetch}.export.CSV.zip`, 12 | zipName = `./${ZIP_DIR}/${dateToFetch}.export.CSV.zip`; 13 | fileDownloadedName = `${ZIP_DIR}/${dateToFetch}.export.CSV`; 14 | 15 | function unzipFile(){ 16 | const zip = new AdmZip(zipName); 17 | console.log("Start Decompressing Zip!"); 18 | zip.extractAllTo(ZIP_DIR, true); 19 | 20 | fs.unlink(zipName, function(err){ 21 | if (err) throw err; 22 | console.log("Done Decompressing Zip!"); 23 | }); 24 | } 25 | 26 | const download = function(url, dest, cb) { 27 | if (!fs.existsSync(ZIP_DIR)){ 28 | fs.mkdirSync(ZIP_DIR); 29 | } 30 | 31 | var file = fs.createWriteStream(dest); 32 | 33 | console.log("START DOWNLOADING "+url); 34 | var request = http.get(url, function(response) { 35 | response.pipe(file); 36 | file.on('finish', function() { 37 | console.log("DONE DOWNLOADING "+url); 38 | file.close(cb); // close() is async, call cb after close completes. 39 | }); 40 | }).on('error', function(err) { // Handle errors 41 | fs.unlink(dest); // Delete the file async. (But we don't check the result) 42 | if (cb) cb(err.message); 43 | }); 44 | }; 45 | 46 | download(url,zipName,afterDownload); 47 | 48 | function afterDownload(err,res){ 49 | unzipFile(); // Blocking 50 | console.log('Waiting...'); 51 | setTimeout(()=>{ 52 | indexer.indexGdeltFile(fileDownloadedName); 53 | },6000) 54 | 55 | } 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /gdelt/columns.js: -------------------------------------------------------------------------------- 1 | module.exports = 'GLOBALEVENTID SQLDATE MonthYear Year FractionDate Actor1Code Actor1Name Actor1CountryCode Actor1KnownGroupCode Actor1EthnicCode Actor1Religion1Code Actor1Religion2Code Actor1Type1Code Actor1Type2Code Actor1Type3Code Actor2Code Actor2Name Actor2CountryCode Actor2KnownGroupCode Actor2EthnicCode Actor2Religion1Code Actor2Religion2Code Actor2Type1Code Actor2Type2Code Actor2Type3Code IsRootEvent EventCode EventBaseCode EventRootCode QuadClass GoldsteinScale NumMentions NumSources NumArticles AvgTone Actor1Geo_Type Actor1Geo_FullName Actor1Geo_CountryCode Actor1Geo_ADM1Code Actor1Geo_Lat Actor1Geo_Long Actor1Geo_FeatureID Actor2Geo_Type Actor2Geo_FullName Actor2Geo_CountryCode Actor2Geo_ADM1Code Actor2Geo_Lat Actor2Geo_Long Actor2Geo_FeatureID ActionGeo_Type ActionGeo_FullName ActionGeo_CountryCode ActionGeo_ADM1Code ActionGeo_Lat ActionGeo_Long ActionGeo_FeatureID DATEADDED SOURCEURL'.split('\t'); 2 | -------------------------------------------------------------------------------- /gdelt/eventcode_names.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | '01':'MAKE PUBLIC STATEMENT', 3 | '010':'Make statement, not specified below', 4 | '011':'Decline comment', 5 | '012':'Make pessimistic comment', 6 | '013':'Make optimistic comment', 7 | '014':'Consider policy option', 8 | '015':'Acknowledge or claim responsibility', 9 | '016':'Deny responsibility', 10 | '017':'Engage in symbolic act', 11 | '018':'Make empathetic comment', 12 | '019':'Express accord', 13 | 14 | 15 | '02':'APPEAL', 16 | '020':'Make an appeal or request, not specified below', 17 | '021':'Appeal for material cooperation, not specified below', 18 | '0211':'Appeal for economic cooperation', 19 | '0212':'Appeal for military cooperation', 20 | '0213':'Appeal for judicial cooperation', 21 | '0214':'Appeal for intelligence', 22 | '022':'Appeal for diplomatic cooperation (such as policy support)', 23 | '023':'Appeal for aid, not specified below', 24 | '0231':'Appeal for economic aid', 25 | '0232':'Appeal for military aid', 26 | '0233':'Appeal for humanitarian aid', 27 | '0234':'Appeal for military protection or peacekeeping', 28 | '024':'Appeal for political reform, not specified below', 29 | '0241':'Appeal for change in leadership', 30 | '0242':'Appeal for policy change', 31 | '0243':'Appeal for rights', 32 | '0244':'Appeal for change in institutions, regime', 33 | '025':'Appeal to yield, not specified below', 34 | '0251':'Appeal for easing of administrative sanctions', 35 | '0252':'Appeal for easing of political dissent', 36 | '0253':'Appeal for release of persons or property', 37 | '0254':'Appeal for easing of economic sanctions, boycott, or embargo', 38 | '0255':'Appeal for target to allow international involvement (non-mediation)', 39 | '0256':'Appeal for de-escalation of military engagement', 40 | '026':'Appeal to others to meet or negotiate', 41 | '027':'Appeal to others to settle dispute', 42 | '028':'Appeal to engage in or accept mediation', 43 | 44 | 45 | '03':'EXPRESS INTENT TO COOPERATE', 46 | '030':'Express intent to cooperate, not specified below', 47 | '031':'Express intent to engage in material cooperation, not specified below', 48 | '0311':'Express intent to cooperate economically', 49 | '0312':'Express intent to cooperate militarily', 50 | '0313':'Express intent to cooperate on judicial matters', 51 | '0314':'Express intent to cooperate on intelligence', 52 | '032':'Express intent to engage in diplomatic cooperation (such as policy support)', 53 | '033':'Express intent to provide material aid, not specified below', 54 | '0331':'Express intent to provide economic aid', 55 | '0332':'Express intent to provide military aid', 56 | '0333':'Express intent to provide humanitarian aid', 57 | '0334':'Express intent to provide military protection or peacekeeping', 58 | '034':'Express intent to institute political reform, not specified below', 59 | '0341':'Express intent to change leadership', 60 | '0342':'Express intent to change policy', 61 | '0343':'Express intent to provide rights', 62 | '0344':'Express intent to change institutions, regime', 63 | '035':'Express intent to yield, not specified below', 64 | '0351':'Express intent to ease administrative sanctions', 65 | '0352':'Express intent to ease popular dissent', 66 | '0353':'Express intent to release persons or property', 67 | '0354':'Express intent to ease economic sanctions, boycott, or embargo', 68 | '0355':'Express intent to allow international involvement (non-mediation)', 69 | '0356':'Express intent to de-escalate military engagement', 70 | '036':'Express intent to meet or negotiate', 71 | '037':'Express intent to settle dispute', 72 | '038':'Express intent to accept mediation', 73 | '039':'Express intent to mediate', 74 | 75 | 76 | '04':'CONSULT', 77 | '040':'Consult, not specified below', 78 | '041':'Discuss by telephone', 79 | '042':'Make a visit', 80 | '043':'Host a visit', 81 | '044':'Meet at a "third" location', 82 | '045':'Mediate', 83 | '046':'Engage in negotiation', 84 | 85 | 86 | '05':'ENGAGE IN DIPLOMATIC COOPERATION', 87 | '050':'Engage in diplomatic cooperation, not specified below', 88 | '051':'Praise or endorse', 89 | '052':'Defend verbally', 90 | '053':'Rally support on behalf of', 91 | '054':'Grant diplomatic recognition', 92 | '055':'Apologize', 93 | '056':'Forgive', 94 | '057':'Sign formal agreement', 95 | 96 | 97 | '06':'ENGAGE IN MATERIAL COOPERATION', 98 | '060':'Engage in material cooperation, not specified below', 99 | '061':'Cooperate economically', 100 | '062':'Cooperate militarily', 101 | '063':'Engage in judicial cooperation', 102 | '064':'Share intelligence or information', 103 | 104 | 105 | '07':'PROVIDE AID', 106 | '070':'Provide aid, not specified below', 107 | '071':'Provide economic aid', 108 | '072':'Provide military aid', 109 | '073':'Provide humanitarian aid', 110 | '074':'Provide military protection or peacekeeping', 111 | '075':'Grant asylum', 112 | 113 | 114 | '08':'YIELD', 115 | '080':'Yield, not specified below', 116 | '081':'Ease administrative sanctions, not specified below', 117 | '0811':'Ease restrictions on political freedoms', 118 | '0812':'Ease ban on political parties or politicians', 119 | '0813':'Ease curfew', 120 | '0814':'Ease state of emergency or martial law', 121 | '082':'Ease political dissent', 122 | '083':'Accede to requests or demands for political reform, not specified below', 123 | '0831':'Accede to demands for change in leadership', 124 | '0832':'Accede to demands for change in policy', 125 | '0833':'Accede to demands for rights', 126 | '0834':'Accede to demands for change in institutions, regime', 127 | '084':'Return, release, not specified below', 128 | '0841':'Return, release person(s)', 129 | '0842':'Return, release property', 130 | '085':'Ease economic sanctions, boycott, embargo', 131 | '086':'Allow international involvement, not specified below', 132 | '0861':'Receive deployment of peacekeepers', 133 | '0862':'Receive inspectors', 134 | '0863':'Allow humanitarian access', 135 | '087':'De-escalate military engagement', 136 | '0871':'Declare truce, ceasefire', 137 | '0872':'Ease military blockade', 138 | '0873':'Demobilize armed forces', 139 | '0874':'Retreat or surrender militarily', 140 | 141 | 142 | '09':'INVESTIGATE', 143 | '090':'Investigate, not specified below', 144 | '091':'Investigate crime, corruption', 145 | '092':'Investigate human rights abuses', 146 | '093':'Investigate military action', 147 | '094':'Investigate war crimes', 148 | 149 | 150 | '10':'DEMAND', 151 | '100':'Demand, not specified below', 152 | '101':'Demand material cooperation, not specified below', 153 | '1011':'Demand economic cooperation', 154 | '1012':'Demand military cooperation', 155 | '1013':'Demand judicial cooperation', 156 | '1014':'Demand intelligence cooperation', 157 | '102':'Demand diplomatic cooperation (such as policy support)', 158 | '103':'Demand material aid, not specified below', 159 | '1031':'Demand economic aid', 160 | '1032':'Demand military aid', 161 | '1033':'Demand humanitarian aid', 162 | '1034':'Demand military protection or peacekeeping', 163 | '104':'Demand political reform, not specified below', 164 | '1041':'Demand change in leadership', 165 | '1042':'Demand policy change', 166 | '1043':'Demand rights', 167 | '1044':'Demand change in institutions, regime', 168 | '105':'Demand that target yields, not specified below', 169 | '1051':'Demand easing of administrative sanctions', 170 | '1052':'Demand easing of political dissent', 171 | '1053':'Demand release of persons or property', 172 | '1054':'Demand easing of economic sanctions, boycott, or embargo', 173 | '1055':'Demand that target allows international involvement (non-mediation)', 174 | '1056':'Demand de-escalation of military engagement', 175 | '106':'Demand meeting, negotiation', 176 | '107':'Demand settling of dispute', 177 | '108':'Demand mediation', 178 | 179 | 180 | '11':'DISAPPROVE', 181 | '110':'Disapprove, not specified below', 182 | '111':'Criticize or denounce', 183 | '112':'Accuse, not specified below', 184 | '1121':'Accuse of crime, corruption', 185 | '1122':'Accuse of human rights abuses', 186 | '1123':'Accuse of aggression', 187 | '1124':'Accuse of war crimes', 188 | '1125':'Accuse of espionage, treason', 189 | '113':'Rally opposition against', 190 | '114':'Complain officially', 191 | '115':'Bring lawsuit against', 192 | '116':'Find guilty or liable (legally)', 193 | 194 | 195 | '12':'REJECT', 196 | '120':'Reject, not specified below', 197 | '121':'Reject material cooperation', 198 | '1211':'Reject economic cooperation', 199 | '1212':'Reject military cooperation', 200 | '122':'Reject request or demand for material aid, not specified below', 201 | '1221':'Reject request for economic aid', 202 | '1222':'Reject request for military aid', 203 | '1223':'Reject request for humanitarian aid', 204 | '1224':'Reject request for military protection or peacekeeping', 205 | '123':'Reject request or demand for political reform, not specified below', 206 | '1231':'Reject request for change in leadership', 207 | '1232':'Reject request for policy change', 208 | '1233':'Reject request for rights', 209 | '1234':'Reject request for change in institutions, regime', 210 | '124':'Refuse to yield, not specified below', 211 | '1241':'Refuse to ease administrative sanctions', 212 | '1242':'Refuse to ease popular dissent', 213 | '1243':'Refuse to release persons or property', 214 | '1244':'Refuse to ease economic sanctions, boycott, or embargo', 215 | '1245':'Refuse to allow international involvement (non mediation)', 216 | '1246':'Refuse to de-escalate military engagement', 217 | '125':'Reject proposal to meet, discuss, or negotiate', 218 | '126':'Reject mediation', 219 | '127':'Reject plan, agreement to settle dispute', 220 | '128':'Defy norms, law', 221 | '129':'Veto', 222 | 223 | 224 | '13':'THREATEN', 225 | '130':'Threaten, not specified below', 226 | '131':'Threaten non-force, not specified below', 227 | '1311':'Threaten to reduce or stop aid', 228 | '1312':'Threaten with sanctions, boycott, embargo', 229 | '1313':'Threaten to reduce or break relations', 230 | '132':'Threaten with administrative sanctions, not specified below', 231 | '1321':'Threaten with restrictions on political freedoms', 232 | '1322':'Threaten to ban political parties or politicians', 233 | '1323':'Threaten to impose curfew', 234 | '1324':'Threaten to impose state of emergency or martial law', 235 | '133':'Threaten with political dissent, protest', 236 | '134':'Threaten to halt negotiations', 237 | '135':'Threaten to halt mediation', 238 | '136':'Threaten to halt international involvement (non-mediation)', 239 | '137':'Threaten with repression', 240 | '138':'Threaten with military force, not specified below', 241 | '1381':'Threaten blockade', 242 | '1382':'Threaten occupation', 243 | '1383':'Threaten unconventional violence', 244 | '1384':'Threaten conventional attack', 245 | '1385':'Threaten attack with WMD', 246 | '139':'Give ultimatum', 247 | 248 | 249 | '14':'PROTEST', 250 | '140':'Engage in political dissent, not specified below', 251 | '141':'Demonstrate or rally, not specified below', 252 | '1411':'Demonstrate for leadership change', 253 | '1412':'Demonstrate for policy change', 254 | '1413':'Demonstrate for rights', 255 | '1414':'Demonstrate for change in institutions, regime', 256 | '142':'Conduct hunger strike, not specified below', 257 | '1421':'Conduct hunger strike for leadership change', 258 | '1422':'Conduct hunger strike for policy change', 259 | '1423':'Conduct hunger strike for rights', 260 | '1424':'Conduct hunger strike for change in institutions, regime', 261 | '143':'Conduct strike or boycott, not specified below', 262 | '1431':'Conduct strike or boycott for leadership change', 263 | '1432':'Conduct strike or boycott for policy change', 264 | '1433':'Conduct strike or boycott for rights', 265 | '1434':'Conduct strike or boycott for change in institutions, regime', 266 | '144':'Obstruct passage, block, not specified below', 267 | '1441':'Obstruct passage to demand leadership change', 268 | '1442':'Obstruct passage to demand policy change', 269 | '1443':'Obstruct passage to demand rights', 270 | '1444':'Obstruct passage to demand change in institutions, regime', 271 | '145':'Protest violently, riot, not specified below', 272 | '1451':'Engage in violent protest for leadership change', 273 | '1452':'Engage in violent protest for policy change', 274 | '1453':'Engage in violent protest for rights', 275 | '1454':'Engage in violent protest for change in institutions, regime', 276 | 277 | 278 | '15':'EXHIBIT FORCE POSTURE', 279 | '150':'Demonstrate military or police power, not specified below', 280 | '151':'Increase police alert status', 281 | '152':'Increase military alert status', 282 | '153':'Mobilize or increase police power', 283 | '154':'Mobilize or increase armed forces', 284 | '155':'Mobilize or increase cyber-forces', 285 | 286 | 287 | '16':'REDUCE RELATIONS', 288 | '160':'Reduce relations, not specified below', 289 | '161':'Reduce or break diplomatic relations', 290 | '162':'Reduce or stop material aid, not specified below', 291 | '1621':'Reduce or stop economic assistance', 292 | '1622':'Reduce or stop military assistance', 293 | '1623':'Reduce or stop humanitarian assistance', 294 | '163':'Impose embargo, boycott, or sanctions', 295 | '164':'Halt negotiations', 296 | '165':'Halt mediation', 297 | '166':'Expel or withdraw, not specified below', 298 | '1661':'Expel or withdraw peacekeepers', 299 | '1662':'Expel or withdraw inspectors, observers', 300 | '1663':'Expel or withdraw aid agencies', 301 | 302 | 303 | '17':'COERCE', 304 | '170':'Coerce, not specified below', 305 | '171':'Seize or damage property, not specified below', 306 | '1711':'Confiscate property', 307 | '1712':'Destroy property', 308 | '172':'Impose administrative sanctions, not specified below', 309 | '1721':'Impose restrictions on political freedoms', 310 | '1722':'Ban political parties or politicians', 311 | '1723':'Impose curfew', 312 | '1724':'Impose state of emergency or martial law', 313 | '173':'Arrest, detain, or charge with legal action', 314 | '174':'Expel or deport individuals', 315 | '175':'Use tactics of violent repression', 316 | '176':'Attack cybernetically', 317 | 318 | 319 | '18':'ASSAULT', 320 | '180':'Use unconventional violence, not specified below', 321 | '181':'Abduct, hijack, or take hostage', 322 | '182':'Physically assault, not specified below', 323 | '1821':'Sexually assault', 324 | '1822':'Torture', 325 | '1823':'Kill by physical assault', 326 | '183':'Conduct suicide, car, or other non-military bombing, not specified below', 327 | '1831':'Carry out suicide bombing', 328 | '1832':'Carry out vehicular bombing', 329 | '1833':'Carry out roadside bombing', 330 | '1834':'Carry out location bombing', 331 | '184':'Use as human shield', 332 | '185':'Attempt to assassinate', 333 | '186':'Assassinate', 334 | 335 | 336 | '19':'FIGHT', 337 | '190':'Use conventional military force, not specified below', 338 | '191':'Impose blockade, restrict movement', 339 | '192':'Occupy territory', 340 | '193':'Fight with small arms and light weapons', 341 | '194':'Fight with artillery and tanks', 342 | '195':'Employ aerial weapons, not specified below', 343 | '1951':'Employ precision-guided aerial munitions', 344 | '1952':'Employ remotely piloted aerial munitions', 345 | '196':'Violate ceasefire', 346 | 347 | 348 | '20':'USE UNCONVENTIONAL MASS VIOLENCE', 349 | '200':'Use unconventional mass violence, not specified below', 350 | '201':'Engage in mass expulsion', 351 | '202':'Engage in mass killings', 352 | '203':'Engage in ethnic cleansing', 353 | '204':'Use weapons of mass destruction, not specified below', 354 | '2041':'Use chemical, biological, or radiological weapons', 355 | '2042':'Detonate nuclear weapons' 356 | }; 357 | -------------------------------------------------------------------------------- /gdelt/eventrootcode_names.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | '01':'Make Public Statement', 3 | '02':'Appeal', 4 | '03':'Express Intent to Cooperate', 5 | '04':'Consult', 6 | '05':'Engage in Diplomatic Cooperation', 7 | '06':'Engage in Material Cooperation', 8 | '07':'Provide Aid', 9 | '08':'Yield', 10 | '09':'Investigate', 11 | '10':'Demand', 12 | '11':'Disapprove', 13 | '12':'Reject', 14 | '13':'Threaten', 15 | '14':'Protest', 16 | '15':'Exhibit Force Posture', 17 | '16':'Reduce Relations', 18 | '17':'Coerce', 19 | '18':'Assualt', 20 | '19':'Fight', 21 | '20':'Use Unconventional Mass Violence' 22 | }; 23 | -------------------------------------------------------------------------------- /gdelt/formatter.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const columns = require('./columns'), 4 | quadclass_names = require('./quadclass_names'), 5 | eventrootcode_names = require('./eventrootcode_names'), 6 | eventcode_names = require('./eventcode_names'), 7 | rootEventNames = {"0":"No","1":"Yes"}, 8 | _ = require ('lodash'); 9 | 10 | const cameoToHuman = { 11 | 'QuadClass' : quadclass_names, 12 | 'EventRootCode': eventrootcode_names, 13 | 'EventCode' : eventcode_names, 14 | 'EventBaseCode' :eventcode_names, 15 | 'IsRootEvent' : rootEventNames 16 | }; 17 | 18 | const discardedFields = new Set([ 19 | 'FractionDate', 'SQLDATE', 'MonthYear', 'Year' 20 | ]); 21 | 22 | 23 | const formatter = function(record){ 24 | let newRecord = {}; 25 | record.forEach((columnValue, index) => { 26 | if(columnValue.length>0){ // Only index actual values to save overhead. 27 | let action = columns[index]; 28 | 29 | if(discardedFields.has(action)){ 30 | // The field is discarded from the object going forward (to save overhead) 31 | } 32 | else if(cameoToHuman[action]){ 33 | // Turns CAMEO codes into human readable strings 34 | newRecord[action] = cameoToHuman[action][columnValue] || columnValue; 35 | } 36 | else if (_.startsWith(action ,'Num') || _.startsWith(action ,'Avg') || action === 'GoldsteinScale'){ 37 | // Turns numbers from strings to actual numbers that can be evaluated , sorted , aggregated... 38 | newRecord[action]=Number(columnValue); 39 | } 40 | else if(_.endsWith(action ,'Geo_FullName')){ 41 | // Transform "Petersburg, Sankt-Peterburg, Russia" into ["Petersburg", "Sankt-Peterburg", "Russia"] for easy evaluation in index 42 | newRecord[action]=columnValue.split(", "); 43 | } 44 | else if(_.endsWith(action ,'Geo_Lat') || _.endsWith(action ,'Geo_Long')){ 45 | // Transform "Actor1Geo_Long":"70", "Actor1Geo_Lat":"30" into elastichsearch geo points Actor1Geo_Location = {lat:30,lon:70} 46 | let newAction = `${action.split('_')[0]}_Location`; 47 | if(!newRecord[newAction]){newRecord[newAction] = {};} 48 | newRecord[newAction][action.substr(10,3).toLowerCase()] = Number(columnValue); 49 | } 50 | else if (action === 'DATEADDED'){ 51 | // Transform Date into elasticsearch format 52 | newRecord[action]=`${columnValue.substr(0,4)}-${columnValue.substr(4,2)}-${columnValue.substr(6,2)}`; 53 | } 54 | else { 55 | // Default : just the string value 56 | newRecord[action] = columnValue; 57 | } 58 | } 59 | }); 60 | return newRecord; 61 | } 62 | 63 | module.exports = formatter; 64 | -------------------------------------------------------------------------------- /gdelt/quadclass_names.js: -------------------------------------------------------------------------------- 1 | module.exports= { 2 | "0":"Neutral", 3 | "1":"Verbal Cooperation", 4 | "2":"Material Cooperation", 5 | "3":"Verbal Conflict", 6 | "4":"Material Conflict" 7 | }; 8 | -------------------------------------------------------------------------------- /indexer.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // External dependencies 4 | const fs = require('fs'), 5 | transform = require('stream-transform'), 6 | parse = require('csv-parse'), 7 | gdelt_formatter = require('./gdelt/formatter.js'), 8 | esIndexer = require('./es.js'), 9 | concurrency = 10000; 10 | 11 | function indexGdeltFile(filename) { 12 | const input = fs.createReadStream(__dirname+`/${filename}`); 13 | 14 | const indexer = new esIndexer({_index: 'elastic_gdelt', _type: 'event'}); 15 | const parser = parse({delimiter: '\t'}); 16 | 17 | 18 | const formatter = transform(function(record, callback){ 19 | callback(null, gdelt_formatter(record)); 20 | }, {parallel: concurrency}); 21 | 22 | const elastic_indexer = transform(function(record, callback){ 23 | indexer.indexDoc(record,callback); 24 | }, {parallel: concurrency}); 25 | 26 | input.on('end', () => { 27 | console.log("Finished reading CSV file."); 28 | // TODO: finish indexing handler... 29 | }); 30 | 31 | input.pipe(parser) 32 | .pipe(formatter) 33 | .pipe(elastic_indexer) 34 | .pipe(process.stdout); 35 | } 36 | 37 | module.exports = { 38 | indexGdeltFile 39 | } 40 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elasticsearch-gdelt", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "download": "node file_prepare.js" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git+https://github.com/shaialon/elasticsearch-gdelt.git" 13 | }, 14 | "author": "", 15 | "license": "ISC", 16 | "bugs": { 17 | "url": "https://github.com/shaialon/elasticsearch-gdelt/issues" 18 | }, 19 | "engines": { 20 | "node": "5.1.0" 21 | }, 22 | "homepage": "https://github.com/shaialon/elasticsearch-gdelt#readme", 23 | "dependencies": { 24 | "adm-zip": "^0.4.7", 25 | "csv-parse": "^1.0.0", 26 | "elasticsearch": "^14.0.0", 27 | "lodash": "^4.17.4", 28 | "stream-transform": "^0.1.1" 29 | } 30 | } 31 | --------------------------------------------------------------------------------