├── R ├── .DS_Store ├── gdeltr-package.r ├── nameFixer_data.R ├── nameFixer.R ├── gDate.R ├── write.gephi.R ├── fillSeries.R ├── GKGextractcameo.R ├── GKGcounts.R ├── toner.R ├── subsetEventCountry.R ├── GKGedgelist.R ├── toneTrend.R ├── themeTrend.R ├── LocationThemes.R ├── GKGLatLong.R ├── GKGcomentions.R └── getEventCounts.R ├── data ├── .DS_Store ├── nameFixer_data.rda └── .Rapp.history ├── man ├── gdeltr.Rd ├── nameFixer_data.Rd ├── nameFixer.Rd ├── getCounts.Rd ├── gDate.Rd ├── GKGextractcameo.Rd ├── toner.Rd ├── write.gephi.Rd ├── fillSeries.Rd ├── GKGcomentions.Rd ├── getComentions.Rd ├── themeTrend.Rd ├── GKGcounts.Rd ├── getEventCounts.Rd ├── subsetEventCountry.Rd ├── toneTrend.Rd ├── LocationThemes.Rd ├── GKGedgelist.Rd └── GKGLatLong.Rd ├── NAMESPACE ├── DESCRIPTION ├── LICENSE └── README.md /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/R/.DS_Store -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/data/.DS_Store -------------------------------------------------------------------------------- /R/gdeltr-package.r: -------------------------------------------------------------------------------- 1 | #' gdeltr 2 | #' 3 | #' @name gdeltr 4 | #' @docType package 5 | NULL 6 | -------------------------------------------------------------------------------- /data/nameFixer_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/data/nameFixer_data.rda -------------------------------------------------------------------------------- /data/.Rapp.history: -------------------------------------------------------------------------------- 1 | load("/Users/andyhalterman/R/gdeltr/data/nameFixer_data.rda") 2 | load("/Users/andyhalterman/R/gdeltr/data/nameFixer_data.rda") 3 | -------------------------------------------------------------------------------- /man/gdeltr.Rd: -------------------------------------------------------------------------------- 1 | \docType{package} 2 | \name{gdeltr} 3 | \alias{gdeltr} 4 | \alias{gdeltr-package} 5 | \title{gdeltr} 6 | \description{ 7 | gdeltr 8 | } 9 | 10 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(GKGLatLong) 2 | export(GKGcomentions) 3 | export(GKGcounts) 4 | export(GKGedgelist) 5 | export(GKGextractcameo) 6 | export(LocationThemes) 7 | export(fillSeries) 8 | export(gDate) 9 | export(getEventCounts) 10 | export(nameFixer) 11 | export(subsetEventCountry) 12 | export(themeTrend) 13 | export(toneTrend) 14 | export(toner) 15 | export(write.gephi) 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: gdeltr 2 | Title: R tools for GDELT 3 | Description: Useful tools for working with GDELT and the Global Knowledge Graph 4 | Version: 0.1 5 | Author: 'Andrew Halterman' 6 | Maintainer: 'Andrew Halterman' 7 | Depends: 8 | R (>= 3.0.1), 9 | dplyr, 10 | countrycode, 11 | RSQLite, 12 | RSQLite.extfuns 13 | License: MIT 14 | LazyData: true 15 | ByteCompile: true 16 | -------------------------------------------------------------------------------- /R/nameFixer_data.R: -------------------------------------------------------------------------------- 1 | #' Name translation dataframe for GKG 2 | #' 3 | 4 | #' 5 | #' \itemize{ 6 | #' \item oldvalue: The name to be replaced 7 | #' \item newvalue: The standard name form 8 | #' } 9 | #' 10 | #' @note This is a work in progress. 11 | #' @docType data 12 | #' @keywords datasets, gdelt, gdeltr 13 | #' @name nameFixer_data 14 | #' @usage nameFixer_data 15 | #' @format A data frame with a few dozen rows and 2 columns 16 | NULL -------------------------------------------------------------------------------- /man/nameFixer_data.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{nameFixer_data} 3 | \alias{nameFixer_data} 4 | \title{Name translation dataframe for GKG} 5 | \format{A data frame with a few dozen rows and 2 columns} 6 | \usage{ 7 | nameFixer_data 8 | } 9 | \description{ 10 | \itemize{ \item oldvalue: The name to be replaced \item 11 | newvalue: The standard name form } 12 | } 13 | \note{ 14 | This is a work in progress. 15 | } 16 | \keyword{datasets,} 17 | \keyword{gdelt,} 18 | \keyword{gdeltr} 19 | 20 | -------------------------------------------------------------------------------- /man/nameFixer.Rd: -------------------------------------------------------------------------------- 1 | \name{nameFixer} 2 | \alias{nameFixer} 3 | \title{Standardize names in a data frame.} 4 | \usage{ 5 | nameFixer(namevector) 6 | } 7 | \arguments{ 8 | \item{data}{A vector of names \code{gkg.df}} 9 | } 10 | \value{ 11 | newvec A vector with cleaned names 12 | } 13 | \description{ 14 | This will be very much a function in progress. The focus 15 | is on Syrian names right now. Credit to 16 | http://susanejohnston.wordpress.com/ for find-and-replace 17 | code. 18 | } 19 | \examples{ 20 | Some R code here 21 | } 22 | \keyword{GDELT,} 23 | \keyword{gdeltr} 24 | 25 | -------------------------------------------------------------------------------- /man/getCounts.Rd: -------------------------------------------------------------------------------- 1 | \name{getCounts} 2 | \alias{getCounts} 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return the info in the "Counts" field as a data frame.} 4 | \usage{ 5 | getCounts(gkg.df) 6 | } 7 | \arguments{ 8 | \item{gkg.df}{\code{gkg.df}} 9 | } 10 | \value{ 11 | counts A data frame containing count information. 12 | } 13 | \description{ 14 | This will only give you the info in the counts field, and 15 | in no particular order. Next steps: getting the date and 16 | themes to come with it. 17 | } 18 | \examples{ 19 | R code here showing how your function works 20 | } 21 | \keyword{GDELT,} 22 | \keyword{gdeltr} 23 | 24 | -------------------------------------------------------------------------------- /man/gDate.Rd: -------------------------------------------------------------------------------- 1 | \name{gDate} 2 | \alias{gDate} 3 | \title{Given a vector of 8 digit dates (yyyymmdd), returns a date obj in "yyyy-mm-dd"} 4 | \usage{ 5 | gDate(date.vector) 6 | } 7 | \arguments{ 8 | \item{date.vector}{A vector of the SQLDATE column from 9 | GDELT, in form yyyymmdd \code{date.vector}} 10 | } 11 | \value{ 12 | newdate A vector of class date with "yyyy-mm-dd" format 13 | \code{newdate} 14 | } 15 | \description{ 16 | This comes up a lot working with GDELT. Make sure you only 17 | pass it a vector, not the whole data frame! 18 | } 19 | \examples{ 20 | R code here showing how your function works 21 | } 22 | \keyword{GDELT,} 23 | \keyword{gdeltr} 24 | 25 | -------------------------------------------------------------------------------- /man/GKGextractcameo.Rd: -------------------------------------------------------------------------------- 1 | \name{GKGextractcameo} 2 | \alias{GKGextractcameo} 3 | \title{Extract CAMEO events from GKG} 4 | \usage{ 5 | GKGextractcameo(df, justvector = TRUE) 6 | } 7 | \arguments{ 8 | \item{df}{A dataframe of GKG namesets} 9 | 10 | \item{justvector}{Return vector of ID numbers instead of 11 | actual data frame?} 12 | } 13 | \value{ 14 | gdelt.df A vector of all linked CAMEO event IDs 15 | } 16 | \description{ 17 | From a GKG subset dataframe, return a dataframe of all 18 | linked CAMEO event IDs. If \code{justvector=TRUE}, the 19 | fuction will return only a vector of linked CAMEO event IDs 20 | rather than the complete dataframe of all matching events. 21 | } 22 | \examples{ 23 | cameos.events <- GKGextractcameo(mexico.cartels) 24 | } 25 | \keyword{GDELT,} 26 | \keyword{gdeltr} 27 | 28 | -------------------------------------------------------------------------------- /R/nameFixer.R: -------------------------------------------------------------------------------- 1 | #' Standardize names in a data frame. 2 | #' 3 | #' This will be very much a function in progress. The focus is on Syrian names right now. 4 | #' Credit to http://susanejohnston.wordpress.com/ for find-and-replace code. 5 | #' 6 | #' @param data A vector of names \code{gkg.df} 7 | #' 8 | #' @return newvec A vector with cleaned names 9 | #' 10 | #' @keywords GDELT, gdeltr 11 | #' 12 | #' @export 13 | #' 14 | #' @examples 15 | #' Some R code here 16 | 17 | 18 | nameFixer <- function(namevector) { 19 | if (is.factor(namevector)){ 20 | namevector <- as.character(namevector) 21 | } 22 | oldvalue <- nameFixer_data[,1] 23 | newvalue <- nameFixer_data[,2] 24 | 25 | newvec <- namevector 26 | for (i in unique(oldvalue)) newvec[namevector == i] <- newvalue[oldvalue == i] 27 | return(newvec) 28 | } -------------------------------------------------------------------------------- /R/gDate.R: -------------------------------------------------------------------------------- 1 | #' Given a vector of 8 digit dates (yyyymmdd), returns a date obj in "yyyy-mm-dd" 2 | #' 3 | #' This comes up a lot working with GDELT. 4 | #' Make sure you only pass it a vector, not the whole data frame! 5 | #' 6 | #' @param date.vector A vector of the SQLDATE column from GDELT, in form yyyymmdd \code{date.vector} 7 | #' 8 | #' @return newdate A vector of class date with "yyyy-mm-dd" format \code{newdate} 9 | #' 10 | #' @keywords GDELT, gdeltr 11 | #' 12 | #' @export 13 | #' 14 | #' @examples 15 | #' R code here showing how your function works 16 | 17 | 18 | gDate <- function(date.vector) { 19 | date.vector <- as.character(date.vector) 20 | x <- substr(date.vector, 1, 4) 21 | y <- substr(date.vector, 5, 6) 22 | z <- substr(date.vector, 7, 8) 23 | date.vector <- paste(x,y,z,sep="-") 24 | newdate <- as.Date(date.vector, format="%Y-%m-%d") 25 | return(newdate) 26 | } -------------------------------------------------------------------------------- /man/toner.Rd: -------------------------------------------------------------------------------- 1 | \name{toner} 2 | \alias{toner} 3 | \title{Given a GKG subset, return the tones associated with each person/place/organization} 4 | \usage{ 5 | toner(df, type) 6 | } 7 | \arguments{ 8 | \item{df}{A subset of the GKG, probably along one 9 | theme\code{df}} 10 | 11 | \item{type}{Return tones of organization, locations, or 12 | persons?} 13 | 14 | \item{summarize}{Should the mean for each unique entity 15 | be returned? Caution: lots of alt. spellings 16 | \code{summarize}} 17 | } 18 | \value{ 19 | tones A df with names/locations and tones (and counts if 20 | summarized). 21 | } 22 | \description{ 23 | summarize will return the mean tone for each entity. This 24 | feature isn't done yet. 25 | } 26 | \examples{ 27 | > ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 28 | > person.tone.ieds <- toner(ieds, type="persons") 29 | > dim(person.tone.ieds) 30 | [1] 4545 2 31 | } 32 | \keyword{GDELT,} 33 | \keyword{gdeltr} 34 | 35 | -------------------------------------------------------------------------------- /man/write.gephi.Rd: -------------------------------------------------------------------------------- 1 | \name{write.gephi} 2 | \alias{write.gephi} 3 | \title{Wrapper for write.table for outputting from the GKG to Gephi} 4 | \usage{ 5 | write.gephi(gkg.df, filename, type) 6 | } 7 | \arguments{ 8 | \item{gkg.df}{A dataframe to export to gephi 9 | \code{gkg.df}} 10 | 11 | \item{filename}{The name for the file. Call it .csv even 12 | though its semicolons \code{gkg.df}} 13 | 14 | \item{type}{ragged or list? List will generate an edge 15 | list rather than a ragged data frame.} 16 | } 17 | \value{ 18 | gkg.df A semicolon seperated file with quotes. 19 | } 20 | \description{ 21 | Specifically, it adds quotes to prevent extra splitting, 22 | removes row/col names, and saves with a semicolon 23 | separator. Obvs., it's undirected. If it's a node list, the 24 | nodes MUST be in a column labeled "ID". 25 | } 26 | \examples{ 27 | R code here showing how your function works 28 | } 29 | \keyword{GDELT,} 30 | \keyword{gdeltr} 31 | 32 | -------------------------------------------------------------------------------- /man/fillSeries.Rd: -------------------------------------------------------------------------------- 1 | \name{fillSeries} 2 | \alias{fillSeries} 3 | \title{Fills in missing dates in a data frame of GDELT events for plotting or time series analysis} 4 | \usage{ 5 | fillSeries(df, begin.date = "2000-01-01", end.date = "2013-09-30", 6 | date.column = "SQLDATE", extraclean = FALSE) 7 | } 8 | \arguments{ 9 | \item{df}{A GDELT dataframe. \code{df}} 10 | 11 | \item{begin.date}{The earliest date. Defaults to Jan 1, 12 | 2000. \code{begin.date}} 13 | 14 | \item{end.date}{The last date. Defaults to Sept 30, 15 | 2013. \code{end.date}} 16 | 17 | \item{date.column}{The name of the column containing 18 | dates. Defaults to "SQLDATE" \code{date.column}} 19 | } 20 | \value{ 21 | df2 A 22 | } 23 | \description{ 24 | Fills in missing dates in a data frame of GDELT events for 25 | plotting or time series analysis 26 | } 27 | \examples{ 28 | R code here showing how your function works 29 | } 30 | \keyword{GDELT,} 31 | \keyword{gdeltr} 32 | 33 | -------------------------------------------------------------------------------- /man/GKGcomentions.Rd: -------------------------------------------------------------------------------- 1 | \name{GKGcomentions} 2 | \alias{GKGcomentions} 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions.} 4 | \usage{ 5 | GKGcomentions(gkg.df, type) 6 | } 7 | \arguments{ 8 | \item{gkg.df}{A subset of the Global Knowledge Graph 9 | \code{gkg.df}} 10 | 11 | \item{type}{Data types to subset: "themes", "persons", 12 | "organizations", "countries", or "latlong". 13 | \code{gkg.df}} 14 | } 15 | \value{ 16 | countries.df A data frame containing count information. 17 | } 18 | \description{ 19 | This takes a GKG dataframe (or subset thereof) returns a 20 | dataframe with all co-mentioned entities of the desired 21 | type listed on the same row. This is designed for export to 22 | social network analysis software. Run the output through 23 | \code{write.gephi} if needed. New feature: uses 24 | \code{nameFixer} to standardize people names. 25 | } 26 | \examples{ 27 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 28 | ieds.orgs <- GKGcomentions(ieds, type="organizations") 29 | } 30 | \keyword{GDELT,} 31 | \keyword{gdeltr} 32 | 33 | -------------------------------------------------------------------------------- /man/getComentions.Rd: -------------------------------------------------------------------------------- 1 | \name{getComentions} 2 | \alias{getComentions} 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions.} 4 | \usage{ 5 | getComentions(gkg.df, type) 6 | } 7 | \arguments{ 8 | \item{gkg.df}{A subset of the Global Knowledge Graph 9 | \code{gkg.df}} 10 | 11 | \item{type}{Data types to subset: "themes", "persons", 12 | "organizations", "countries", or "placenames". 13 | \code{gkg.df}} 14 | } 15 | \value{ 16 | co-mentions A data frame containing count information. 17 | } 18 | \description{ 19 | This takes a GKG dataframe (or subset thereof) returns a 20 | dataframe with all co-mentioned entities of the desired 21 | type listed on the same row. This is designed for export 22 | to social network analysis software. Run the output 23 | through \code{write.gephi} if needed. New feature: uses 24 | \code{nameFixer} to standardize people names. 25 | } 26 | \examples{ 27 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 28 | ieds.orgs <- getCo-mentions(ieds, type="organizations") 29 | } 30 | \keyword{GDELT,} 31 | \keyword{gdeltr} 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Andy Halterman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /R/write.gephi.R: -------------------------------------------------------------------------------- 1 | #' Wrapper for write.table for outputting from the GKG to Gephi 2 | #' 3 | #' Specifically, it adds quotes to prevent extra splitting, removes row/col names, and saves with a semicolon separator. 4 | #' Obvs., it's undirected. 5 | #' If it's a node list, the nodes MUST be in a column labeled "ID". 6 | #' 7 | #' @param gkg.df A dataframe to export to gephi \code{gkg.df} 8 | #' @param filename The name for the file. Call it .csv even though its semicolons \code{gkg.df} 9 | #' @param type ragged or list? List will generate an edge list rather than a ragged data frame. 10 | #' 11 | #' @return gkg.df A semicolon seperated file with quotes. 12 | #' 13 | #' @keywords GDELT, gdeltr 14 | #' 15 | #' @export 16 | #' 17 | #' @examples 18 | #' R code here showing how your function works 19 | 20 | write.gephi <- function(gkg.df, filename, type) { 21 | pst <- function(x) {paste0("\'", x, "\'")} 22 | if (type=="edge") { 23 | gkg.df <- as.data.frame(lapply(gkg.df[,1:ncol(gkg.df)], FUN= function(x) {sapply(x, FUN=pst)})) 24 | } 25 | if (type=="node"){ 26 | gkg.df$ID <- pst(gkg.df$ID) 27 | } 28 | 29 | write.table(gkg.df, file=filename, sep=";", row.names=FALSE, col.names=TRUE) 30 | } -------------------------------------------------------------------------------- /man/themeTrend.Rd: -------------------------------------------------------------------------------- 1 | \name{themeTrend} 2 | \alias{themeTrend} 3 | \title{Graph changes in themes over time, given a GKG subset.} 4 | \usage{ 5 | themeTrend(df, themes, location, overlay = TRUE, returndata = FALSE, 6 | span = 0.3) 7 | } 8 | \arguments{ 9 | \item{df}{A subset of the Global Knowledge Graph, 10 | probably a country \code{gkg.df}} 11 | 12 | \item{themes}{A vector of themes from GKG.} 13 | 14 | \item{span}{The smoothing factor for the loess curve. 15 | Default is 0.3} 16 | 17 | \item{location}{A location, potentially more specific 18 | than the data frame subset.} 19 | 20 | \item{overlay}{Should the lines be plotted on the same 21 | graph or separate?} 22 | 23 | \item{returndata}{If true, returns the raw data and does 24 | not plot anything} 25 | } 26 | \value{ 27 | theme.counts A data frame containing number of events per 28 | day per theme. 29 | } 30 | \description{ 31 | This takes a GKG dataframe and a list of themes and plots 32 | the mentions of the themes over time. 33 | } 34 | \examples{ 35 | gkg <- read.csv("gkg.csv") 36 | mex <- gkg[grep("Mexico", gkg$LOCATIONS),] 37 | themeTrends(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), location="Mexico") 38 | } 39 | \keyword{GDELT,} 40 | \keyword{gdeltr} 41 | 42 | -------------------------------------------------------------------------------- /man/GKGcounts.Rd: -------------------------------------------------------------------------------- 1 | \name{GKGcounts} 2 | \alias{GKGcounts} 3 | \title{Given a dataframe of "Counts" information from the GKG} 4 | \usage{ 5 | GKGcounts(gkg) 6 | } 7 | \arguments{ 8 | \item{gkg}{A subset dataframe of the GKG. \code{gkg}} 9 | } 10 | \value{ 11 | counts A data frame containing information from the 12 | \code{Counts} column. 13 | } 14 | \description{ 15 | The Global Knowledge Graph contains two elements, the 16 | "Counts" file, containing information on the numbers of 17 | people killed, affected, etc. every day by location. The 18 | second file, the "Graph file", contains the associated 19 | themes, organizations, people, and locations. Filtering, 20 | especially by themes, is very useful, but much of the 21 | useable information in the GKG is in the Counts file. This 22 | function will return the Counts file, nicely formatted 23 | (it's \code{;} and \code{#} separated, which is a hassle), 24 | in no particular order. It loses the date and theme 25 | information, though, which is the next room for 26 | improvement. 27 | } 28 | \examples{ 29 | # Say we were interested in the number of people killed by mines/IEDs. 30 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 31 | ieds.counts <- GKGcounts(ieds) 32 | } 33 | \keyword{GDELT,} 34 | \keyword{gdeltr} 35 | 36 | -------------------------------------------------------------------------------- /R/fillSeries.R: -------------------------------------------------------------------------------- 1 | #' Fills in missing dates in a data frame of GDELT events for plotting or time series analysis 2 | #' 3 | #' 4 | #' @param df A GDELT dataframe. \code{df} 5 | #' @param begin.date The earliest date. Defaults to Jan 1, 2000. \code{begin.date} 6 | #' @param end.date The last date. Defaults to Sept 30, 2013. \code{end.date} 7 | #' @param date.column The name of the column containing dates. Defaults to "SQLDATE" \code{date.column} 8 | #' 9 | #' 10 | #' @return df2 A 11 | #' 12 | #' @keywords GDELT, gdeltr 13 | #' 14 | #' @export 15 | #' 16 | #' @examples 17 | #' R code here showing how your function works 18 | #' 19 | 20 | fillSeries <- function(df, begin.date="2000-01-01", end.date="2013-09-30", date.column="SQLDATE", extraclean=FALSE){ 21 | if (class(df$date.column)=="integer"){ 22 | df$date.column <- gDate(df$date.column) 23 | } 24 | daily <- as.data.frame(seq(from=as.Date(begin.date), to=as.Date(end.date), by="1 day")) 25 | names(daily) <- "Date" 26 | df <- merge(x=daily, y=df, by.x="Date", by.y=date.column, all.x=TRUE) 27 | if (extraclean==TRUE) { 28 | df[is.na(df$count),5] <- 0 29 | df <- df[,c(1:5)] 30 | names(df) <- c("Date", "ActionGeo_Lat", "ActionGeo_Long", "EventRootCode", "Count") 31 | return(df) 32 | } 33 | else { 34 | return(df) 35 | } 36 | } -------------------------------------------------------------------------------- /man/getEventCounts.Rd: -------------------------------------------------------------------------------- 1 | \name{getEventCounts} 2 | \alias{getEventCounts} 3 | \title{Get event counts per country-month from GDELT} 4 | \usage{ 5 | getEventCounts(countryname, eventtype = "root", min.date = 20000101) 6 | } 7 | \arguments{ 8 | \item{country.name}{A normal English country name 9 | (character) \code{country.name}} 10 | 11 | \item{eventtype}{What event code resolution? Options: 12 | "code", "base", "root", "quad". \code{eventtype}} 13 | 14 | \item{min.date}{Furthest date back you want (numeric). 15 | \code{min.date}} 16 | } 17 | \value{ 18 | df A data frame counts per event per month in the country 19 | } 20 | \description{ 21 | By default, this goes back to Jan 1 2000, but you can 22 | change it. Inputs must be characters. Requires the 23 | \code{countrycode} package to translate from country name 24 | to FIPS104. Assumes you have GDELT in a dplyr tble in 25 | tables called "hist.db" and "daily.db". I have them in a 26 | SQLite database, but dplyr will let you use whatever you 27 | want. See Hadley Wickham's github page. 28 | } 29 | \details{ 30 | Reverted to a prev. version. No pre-build scaffolding to 31 | get all of the columns. 32 | } 33 | \examples{ 34 | mex.protest <- getEventCounts("Mexico", eventtype="root", min.date==20000101) 35 | } 36 | \keyword{GDELT,} 37 | \keyword{gdeltr} 38 | 39 | -------------------------------------------------------------------------------- /man/subsetEventCountry.Rd: -------------------------------------------------------------------------------- 1 | \name{subsetEventCountry} 2 | \alias{subsetEventCountry} 3 | \title{Subset GDELT by a given EventRootCode and country name, returning lat/long for each event.} 4 | \usage{ 5 | subsetEventCountry(event.root.code, country.name, min.date = 20000101) 6 | } 7 | \arguments{ 8 | \item{event.root.code}{One of the 20 EventRootCodes in 9 | CAMEO/GDELT, including leading zero 10 | \code{event.root.code}} 11 | 12 | \item{country.name}{A normal English country name 13 | \code{country.name}} 14 | 15 | \item{min.date}{Furthest date back you want. 16 | \code{min.date}} 17 | } 18 | \value{ 19 | df.out A data frame of the events of interet from the 20 | country, including geographic coordinates. 21 | } 22 | \description{ 23 | By default, this goes back to Jan 1 2000, but you can 24 | change it. Inputs must be characters and EventRootCodes 25 | must include leading zeros. Requires the \code{countrycode} 26 | package to translate from country name to FIPS104. Assumes 27 | you have GDELT in a dplyr tble in tables called "hist.db" 28 | and "daily.db". I have them in a SQLite database, but 29 | dplyr will let you use whatever you want. See Hadley 30 | Wickham's github page. 31 | } 32 | \examples{ 33 | mex.protest <- subsetEventCountry("14", "Mexico", min.date==20000101) 34 | } 35 | \keyword{GDELT,} 36 | \keyword{gdeltr} 37 | \keyword{geographic,} 38 | 39 | -------------------------------------------------------------------------------- /R/GKGextractcameo.R: -------------------------------------------------------------------------------- 1 | #' Extract CAMEO events from GKG 2 | #' 3 | #' From a GKG subset dataframe, return a dataframe of all linked CAMEO event IDs. 4 | #' If \code{justvector=TRUE}, the fuction will return only a vector of linked CAMEO event IDs rather than the complete dataframe of all matching events. 5 | #' 6 | #' @param df A dataframe of GKG namesets 7 | #' @param justvector Return vector of ID numbers instead of actual data frame? 8 | #' 9 | #' @return gdelt.df A vector of all linked CAMEO event IDs 10 | #' 11 | #' @keywords GDELT, gdeltr 12 | #' 13 | #' @export 14 | #' 15 | #' @examples 16 | #' cameos.events <- GKGextractcameo(mexico.cartels) 17 | 18 | GKGextractcameo <- function(df, justvector=TRUE) { 19 | if (!"CAMEOEVENTIDS" %in% names(df)) stop("No column named 'CAMEOEVENTIDS'") 20 | raw <- df$CAMEOEVENTIDS 21 | if (length(raw)==0) stop("No rows in input data frame.") 22 | cameoeventids <- unlist(strsplit(raw, split=",")) 23 | if (justvector==TRUE){ 24 | return(cameoeventids) 25 | } 26 | if (justvector==FALSE){ 27 | # need to check if there's a 'hist.db' and 'daily.db' 28 | # More importantly, need to figure out how to pull records like this. 29 | stop("This feature isn't complete yet") 30 | hist <- as.data.frame(filter(hist.db, GLOBALEVENTID==cameoeventids)) 31 | daily <- as.data.frame(filter(daily.db, GLOBALEVENTID==cameoeventids)) 32 | gdelt.df <- rbind(hist, daily) 33 | return(gdelt.df) 34 | } 35 | } -------------------------------------------------------------------------------- /man/toneTrend.Rd: -------------------------------------------------------------------------------- 1 | \name{toneTrend} 2 | \alias{toneTrend} 3 | \title{Graph changes in tone over time, given a GKG subset.} 4 | \usage{ 5 | toneTrend(df, objects, type, location, overlay = TRUE, span = 0.3, 6 | returndata = FALSE) 7 | } 8 | \arguments{ 9 | \item{df}{A subset of the Global Knowledge Graph, 10 | probably a country \code{gkg.df}} 11 | 12 | \item{object}{A vector of your entities of interest 13 | (persons, themes, or organizations.)} 14 | 15 | \item{type}{persons, themes, or organizations? Only works 16 | for "theme" now.} 17 | 18 | \item{location}{A location, potentially more specific 19 | than the data frame subset.} 20 | 21 | \item{overlay}{Should the lines be plotted on the same 22 | graph or separate?} 23 | 24 | \item{returndata}{If true, returns the raw data and does 25 | not plot anything} 26 | 27 | \item{span}{How much smoothing on the loess curve?} 28 | } 29 | \value{ 30 | theme.counts A data frame containing number of events per 31 | day per theme. 32 | } 33 | \description{ 34 | This takes a GKG dataframe and organizations/themes/people 35 | of interest and returns the tone of daily news coverage of 36 | that entity. Right now it only does themes. 37 | } 38 | \examples{ 39 | gkg <- read.csv("gkg.csv") 40 | mex <- gkg[grep("Mexico", gkg$LOCATIONS),] 41 | toneTrend(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), type="theme", location="Mexico") 42 | } 43 | \keyword{GDELT,} 44 | \keyword{gdeltr} 45 | 46 | -------------------------------------------------------------------------------- /man/LocationThemes.Rd: -------------------------------------------------------------------------------- 1 | \name{LocationThemes} 2 | \alias{LocationThemes} 3 | \title{In a GKG subset, how many times are given themes mentioned in conjunction with given locations?} 4 | \usage{ 5 | LocationThemes(df, themes, countries) 6 | } 7 | \arguments{ 8 | \item{df}{A subset of the Global Knowledge Graph 9 | including more than one country and one theme \code{df}} 10 | 11 | \item{themes}{A vector of themes from GKG. \code{themes}} 12 | 13 | \item{countries}{A vector of countries in country name 14 | form \code{countries}} 15 | } 16 | \value{ 17 | theme.counts A data frame containing counts per theme per 18 | country, suitable for faceted barplotting. 19 | } 20 | \description{ 21 | This takes a GKG dataframe, a list of themes, and a list of 22 | countries and plots the distribution of mentions per 23 | country. 24 | } 25 | \examples{ 26 | latin.protests <- LocationThemes(protests, themes=c("SLUMS", "ECON", "NEW_CONSTRUCTION", "VIOLENT_UNREST", "PUBLIC_TRANSPORT", "EDUCATION"), countries=c("Brazil", "Argentina", "Venezuela", "Colombia", "Uruguay", "Paraguay", "Bolivia", "Ecuador", "Peru", "Chile", "Mexico", "Honduras")) 27 | ggplot(latin.protests, aes(y=Percent, Country, x=Theme, fill=Theme)) + geom_bar(stat="identity") + facet_wrap( ~ Country, nrow=5) + theme_bw() + theme(strip.background = element_rect(fill = 'white'), legend.position="top", axis.ticks = element_blank(), axis.text.x = element_blank()) + labs(x=NULL) 28 | } 29 | \keyword{GDELT,} 30 | \keyword{gdeltr} 31 | 32 | -------------------------------------------------------------------------------- /man/GKGedgelist.Rd: -------------------------------------------------------------------------------- 1 | \name{GKGedgelist} 2 | \alias{GKGedgelist} 3 | \title{Convert a ragged data frame into an edgelist} 4 | \usage{ 5 | GKGedgelist(df, max.connections = 30) 6 | } 7 | \arguments{ 8 | \item{df}{A subset of the GKG \code{df}} 9 | 10 | \item{max.connections}{How many columns to include? Set a 11 | number or "all". Default is 30.} 12 | } 13 | \value{ 14 | edgelist A data frame with two columns containing the two 15 | nodes defining each edge. 16 | } 17 | \description{ 18 | Because GKG's persons, organizations, etc. fields countain 19 | varying numbers of elements, converting them to a data 20 | frame will produce a ragged data frame (rows with different 21 | numbers of columns, albeit padded with NAs. Gephi can 22 | import ragged data frames, thought not if you plan to 23 | include node attributes. This function will take a ragged 24 | data frame and return a edgelist data frame (2 columns, 25 | lots of rows). 26 | } 27 | \details{ 28 | Right now, even with the apply setup (instead of the awful 29 | earlier for-loop) it's still really, really slow. 30 | 31 | Some namesets contain hundreds of names. To increase speed 32 | at the loss of some connections, you can limit the number 33 | of columns that are included. By default, this is set to 34 | 30. 35 | } 36 | \examples{ 37 | corruption<- gkg[grep("CORRUPTION", gkg$THEMES),] 38 | corruption <- GKGcomentions(corruption, type="persons") 39 | corruption.edgelist <- GKGedgelist(corruption, max.connections=40) 40 | } 41 | \keyword{GDELT,} 42 | \keyword{gdeltr} 43 | 44 | -------------------------------------------------------------------------------- /R/GKGcounts.R: -------------------------------------------------------------------------------- 1 | #' Given a dataframe of "Counts" information from the GKG 2 | #' 3 | #' The Global Knowledge Graph contains two elements, the "Counts" file, containing information on the numbers of people killed, affected, etc. every day by location. 4 | #' The second file, the "Graph file", contains the associated themes, organizations, people, and locations. Filtering, especially by themes, is very useful, but much of the useable information in the GKG is in the Counts file. This function will return the Counts file, nicely formatted (it's \code{;} and \code{#} separated, which is a hassle), in no particular order. It loses the date and theme information, though, which is the next room for improvement. 5 | #' 6 | #' @param gkg A subset dataframe of the GKG. \code{gkg} 7 | #' 8 | #' @return counts A data frame containing information from the \code{Counts} column. 9 | #' 10 | #' @keywords GDELT, gdeltr 11 | #' 12 | #' @export 13 | #' 14 | #' @examples 15 | #' # Say we were interested in the number of people killed by mines/IEDs. 16 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 17 | #' ieds.counts <- GKGcounts(ieds) 18 | 19 | GKGcounts <- function(gkg) { 20 | if (!"COUNTS" %in% names(gkg)) stop("No column named 'COUNTS'") 21 | counts <- gkg$COUNTS 22 | if (length(counts)==0) stop("No results--0 rows in input dataframe") 23 | counts <- strsplit(counts, split=";") 24 | counts <- unlist(counts) 25 | counts <- strsplit(counts, split="#") 26 | nMax <- max(sapply(counts, length)) 27 | counts <- cbind(t(sapply(counts, function(i) i[1:nMax]))) 28 | counts <- as.data.frame(counts) 29 | return(counts) 30 | } -------------------------------------------------------------------------------- /man/GKGLatLong.Rd: -------------------------------------------------------------------------------- 1 | \name{GKGLatLong} 2 | \alias{GKGLatLong} 3 | \title{Create a geographic node/edgelist from a GKG dataframe.} 4 | \usage{ 5 | GKGLatLong(gkg.df, filename) 6 | } 7 | \arguments{ 8 | \item{\code{gkg.df}}{A subset of the Global Knowledge 9 | Graph} 10 | 11 | \item{\code{filename}}{The filename of the output 12 | (exclude file endings)} 13 | } 14 | \value{ 15 | edgelist.csv A semicolon-separated csv with an edgelist 16 | using the full geographic names. 17 | 18 | nodelist.csv A semicolon-separated csv including label, id, 19 | latitude, and longitude. 20 | } 21 | \description{ 22 | This takes a GKG dataframe and creates node and edgelists 23 | of the co-mentioned geographic locations. These node and 24 | edgelists can be imported into Gephi and viewed with the 25 | "Geo Layout" option. Its saves the edge and node lists in 26 | the working directory under file names that you specify. 27 | } 28 | \details{ 29 | The files that the function saves can be imported into 30 | Gephi. Import the nodelist file first, and make sure that 31 | \code{lat} and \code{lng} are set to "Double". Import the 32 | edgelist next. After importing both, clear up any 33 | duplicates by going to Data Labratory > More Actions > 34 | Detect and Merge Duplicates, merging on the Label field. 35 | Make sure you have the Geo Layout plugin installed, as well 36 | as the "Map of Countries" plugin if you want to do the full 37 | visualization inside Gephi. 38 | 39 | Feature to add: Take the full dataframe, separate by date 40 | to allow dynamic graphs. 41 | } 42 | \examples{ 43 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 44 | GKGLatLong(ieds, file="ied.network") 45 | } 46 | \keyword{GDELT,} 47 | \keyword{gdeltr} 48 | 49 | -------------------------------------------------------------------------------- /R/toner.R: -------------------------------------------------------------------------------- 1 | #' Given a GKG subset, return the tones associated with each person/place/organization 2 | #' 3 | #' summarize will return the mean tone for each entity. This feature isn't done yet. 4 | #' 5 | #' @param df A subset of the GKG, probably along one theme\code{df} 6 | #' @param type Return tones of organization, locations, or persons? 7 | #' @param summarize Should the mean for each unique entity be returned? Caution: lots of alt. spellings \code{summarize} 8 | #' 9 | #' @return tones A df with names/locations and tones (and counts if summarized). 10 | #' 11 | #' @keywords GDELT, gdeltr 12 | #' 13 | #' @export 14 | #' 15 | #' @examples 16 | #' > ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 17 | #' > person.tone.ieds <- toner(ieds, type="persons") 18 | #' > dim(person.tone.ieds) 19 | #' [1] 4545 2 20 | 21 | 22 | toner <- function(df, type){ 23 | if (!"TONE" %in% names(df)) stop("No column named 'TONE' in input data frame") 24 | if (nrow(df)==0) stop("Input data frame has 0 rows") 25 | 26 | if (type=="Person" | type=="person" | type=="persons" | type=="PERSONS") { 27 | persontone <- data.frame(stringsAsFactors=FALSE) 28 | if (!"PERSONS" %in% names(df)) stop("No column named 'PERSONS' in input data frame") 29 | for (i in 1:nrow(df)) { 30 | line <- df[i,] 31 | persons <- as.character(line$PERSONS) 32 | tone <- as.character(line$TONE) 33 | tone <- unlist(strsplit(tone, ",")) 34 | tone <- as.numeric(tone[1]) 35 | persons <- as.character(unlist(strsplit(persons, ";"))) 36 | ptone.tmp <- cbind(persons, rep(tone, length(persons))) 37 | persontone <- rbind(persontone, ptone.tmp) 38 | } 39 | persontone$persons <- as.character(persontone$persons) 40 | persontone$persons <- nameFixer(persontone$persons) 41 | persontone$V2 <- as.numeric(as.character(persontone$V2)) 42 | persontone <- as.data.frame(summarise(group_by(tbl_df(persontone), persons), count=n(), meantone=mean(V2))) 43 | return(persontone) 44 | } 45 | # if (type=="locations" | "LOCATIONS") { 46 | # if (!"LOCATIONS" %in% names(df)) stop("No column named 'LOCATIONS' in input data frame") 47 | # } 48 | 49 | # if (summarize=TRUE){ 50 | # take the df, group_by column 1, column 2=mean(column2) 51 | # return 52 | # } 53 | } 54 | -------------------------------------------------------------------------------- /R/subsetEventCountry.R: -------------------------------------------------------------------------------- 1 | #' Subset GDELT by a given EventRootCode and country name, returning lat/long for each event. 2 | #' 3 | #' By default, this goes back to Jan 1 2000, but you can change it. 4 | #' Inputs must be characters and EventRootCodes must include leading zeros. 5 | #' Requires the \code{countrycode} package to translate from country name to FIPS104. 6 | #' Assumes you have GDELT in a dplyr tble in tables called "hist.db" and "daily.db". I have them in a SQLite database, but dplyr will let you use whatever you want. See Hadley Wickham's github page. 7 | #' 8 | #' @param event.root.code One of the 20 EventRootCodes in CAMEO/GDELT, including leading zero \code{event.root.code} 9 | #' @param country.name A normal English country name \code{country.name} 10 | #' @param min.date Furthest date back you want. \code{min.date} 11 | #' 12 | #' @return df.out A data frame of the events of interet from the country, including geographic coordinates. 13 | #' 14 | #' @keywords GDELT, geographic, gdeltr 15 | #' 16 | #' @export 17 | #' 18 | #' @examples 19 | #' mex.protest <- subsetEventCountry("14", "Mexico", min.date==20000101) 20 | 21 | 22 | subsetEventCountry <- function(event.root.code, country.name, min.date=20000101){ 23 | require(countrycode) 24 | require(reshape2) 25 | require(dplyr) 26 | require(RSQLite) 27 | require(RSQLite.extfuns) 28 | 29 | country.code <- countrycode(country.name, "country.name", "fips104") 30 | df <- select(hist.db, SQLDATE, EventRootCode, ActionGeo_CountryCode, ActionGeo_Lat, ActionGeo_Long) 31 | df <- filter(df, SQLDATE >= min.date, EventRootCode==event.root.code, ActionGeo_CountryCode==country.code) 32 | df <- group_by(df, SQLDATE, ActionGeo_Lat, ActionGeo_Long, EventRootCode) 33 | df <- summarise(df, count=n()) 34 | df <- as.data.frame(df) 35 | 36 | df.daily <- select(daily.db, SQLDATE, EventRootCode, ActionGeo_CountryCode, ActionGeo_Lat, ActionGeo_Long) 37 | df.daily <- filter(df.daily, EventRootCode==event.root.code, ActionGeo_CountryCode==country.code) 38 | df.daily <- group_by(df.daily, SQLDATE, EventRootCode, ActionGeo_Lat, ActionGeo_Long) 39 | df.daily <- summarise(df.daily, count=n()) 40 | df.daily <- as.data.frame(df.daily) 41 | 42 | df.out <- rbind(df, df.daily) 43 | 44 | df.out$SQLDATE <- gDate(df.out$SQLDATE) 45 | 46 | return(df.out) 47 | } -------------------------------------------------------------------------------- /R/GKGedgelist.R: -------------------------------------------------------------------------------- 1 | #' Convert a ragged data frame into an edgelist 2 | #' 3 | #' Because GKG's persons, organizations, etc. fields countain varying numbers of elements, converting them to a data frame will produce a ragged data frame (rows with different numbers of columns, albeit padded with NAs. 4 | #' Gephi can import ragged data frames, thought not if you plan to include node attributes. This function will take a ragged data frame and return a edgelist data frame (2 columns, lots of rows). 5 | #' 6 | #' Right now, even with the apply setup (instead of the awful earlier for-loop) it's still really, really slow. 7 | #' 8 | #'@details Some namesets contain hundreds of names. To increase speed at the loss of some connections, you can limit the number of columns that are included. By default, this is set to 30. 9 | #' 10 | #' @param df A subset of the GKG \code{df} 11 | #' @param max.connections How many columns to include? Set a number or "all". Default is 30. 12 | #' 13 | #' @return edgelist A data frame with two columns containing the two nodes defining each edge. 14 | #' 15 | #' @keywords GDELT, gdeltr 16 | #' 17 | #'@details 18 | #' Speet test with 1,000 x 30: \n 19 | #' No compilation, omit NAs all at once at the end: 83.417 \n 20 | #' With compilation, omit NAs all all at once at the end: 77.874 \n 21 | #' No compilation, omit NAs every row: 69.892 \n 22 | #' With compilation, omit NAs every row: 70.164 \n 23 | #' [these weren't very scientific since I ran them once each and did other stuff in the background] 24 | #' 25 | #' @export 26 | #' 27 | #' @examples 28 | #' corruption<- gkg[grep("CORRUPTION", gkg$THEMES),] 29 | #' corruption <- GKGcomentions(corruption, type="persons") 30 | #' corruption.edgelist <- GKGedgelist(corruption, max.connections=40) 31 | #' 32 | 33 | # is it faster to omit nas while it's running or after? 34 | 35 | GKGedgelist <- function(df, max.connections=30) { 36 | # trim if needed 37 | if (max.connections!="all"){ 38 | df <- df[,1:max.connections] 39 | } 40 | # the apply part of the function. "combn" provides all the unique combos of x in length n=2. t() transposes. 41 | split.fun <- function(x){ 42 | # x <- x[!is.na(x)] 43 | x <- t(combn(x, 2)) 44 | x <- x[!is.na(x[,1]),] 45 | x <- x[!is.na(x[,2]),] 46 | return(x) 47 | } 48 | # do the call over the length of the df, bind each result. Remove NA's. "complete.cases" wasn't working for me. 49 | edgelist <- do.call("rbind", lapply(df, split.fun)) 50 | } 51 | -------------------------------------------------------------------------------- /R/toneTrend.R: -------------------------------------------------------------------------------- 1 | #' Graph changes in tone over time, given a GKG subset. 2 | #' 3 | #' This takes a GKG dataframe and organizations/themes/people of interest and returns the tone of daily news coverage of that entity. Right now it only does themes. 4 | #' 5 | #' 6 | #' @param df A subset of the Global Knowledge Graph, probably a country \code{gkg.df} 7 | #' @param object A vector of your entities of interest (persons, themes, or organizations.) 8 | #' @param type persons, themes, or organizations? Only works for "theme" now. 9 | #' @param location A location, potentially more specific than the data frame subset. 10 | #' @param overlay Should the lines be plotted on the same graph or separate? 11 | #' @param returndata If true, returns the raw data and does not plot anything 12 | #' @param span How much smoothing on the loess curve? 13 | #' 14 | #' @return theme.counts A data frame containing number of events per day per theme. 15 | #' 16 | #' @keywords GDELT, gdeltr 17 | #' 18 | #' 19 | #' @export 20 | #' 21 | #' @examples 22 | #' gkg <- read.csv("gkg.csv") 23 | #' mex <- gkg[grep("Mexico", gkg$LOCATIONS),] 24 | #' toneTrend(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), type="theme", location="Mexico") 25 | 26 | 27 | toneTrend <- function(df, objects, type, location, overlay=TRUE, span=0.3, returndata=FALSE){ 28 | require(ggplot2) 29 | theme.counts <- data.frame() 30 | for(i in 1:length(objects)){ 31 | # loop through the themes vector, return # per day of each. 32 | object.i <- objects[i] 33 | if(type=="theme" | type=="THEMES"){ 34 | tmp <- df[grep(object.i, df$THEMES),] 35 | tones <- strsplit(as.character(tmp$TONE), ",") 36 | tmp$tone <- as.numeric(sapply(tones, "[", 1)) 37 | tmp$type <- tolower(gsub("_", " ", object.i)) 38 | theme.counts <- rbind(theme.counts, tmp) 39 | } 40 | } 41 | theme.counts <- theme.counts[,c("DATE", "tone", "type")] 42 | # just the cols we need, condense by day 43 | theme.counts$DATE <- gDate(theme.counts$DATE) 44 | theme.counts <- as.data.frame(summarise(group_by(tbl_df(theme.counts), DATE, type), Number=n(), Tone=mean(tone))) 45 | #maxheight <- max(theme.counts$Number) * 1.05 46 | if(returndata==TRUE){ 47 | return(theme.counts) 48 | stop() 49 | } 50 | if(overlay==TRUE){ 51 | # all on the same graph 52 | return(ggplot(data=theme.counts, aes(x=DATE, y=Tone, color=type)) + geom_point(size=2, alpha=0.7) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + ylab("Tone") + theme_bw()) 53 | } 54 | if(overlay==FALSE){ 55 | # on different graphs 56 | return(ggplot(data=theme.counts, aes(x=DATE, y=Tone, type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + facet_wrap(~ type, ncol=1) + theme_bw()) 57 | } 58 | } -------------------------------------------------------------------------------- /R/themeTrend.R: -------------------------------------------------------------------------------- 1 | #' Graph changes in themes over time, given a GKG subset. 2 | #' 3 | #' This takes a GKG dataframe and a list of themes and plots the mentions of the themes over time. 4 | #' 5 | #' 6 | #' @param df A subset of the Global Knowledge Graph, probably a country \code{gkg.df} 7 | #' @param themes A vector of themes from GKG. 8 | #' @param span The smoothing factor for the loess curve. Default is 0.3 9 | #' @param location A location, potentially more specific than the data frame subset. 10 | #' @param overlay Should the lines be plotted on the same graph or separate? 11 | #' @param returndata If true, returns the raw data and does not plot anything 12 | #' 13 | #' @return theme.counts A data frame containing number of events per day per theme. 14 | #' 15 | #' @keywords GDELT, gdeltr 16 | #' 17 | #' 18 | #' @export 19 | #' 20 | #' @examples 21 | #' gkg <- read.csv("gkg.csv") 22 | #' mex <- gkg[grep("Mexico", gkg$LOCATIONS),] 23 | #' themeTrends(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), location="Mexico") 24 | 25 | 26 | themeTrend <- function(df, themes, location, overlay=TRUE, returndata=FALSE, span=0.3){ 27 | require(gdeltr) 28 | require(ggplot2) 29 | # df should preferably just be COUNTS, THEMES, DATE for the region (or theme?) you're interested in. 30 | # location must be a country code right now. In the future it should be a city or anything else grepable. 31 | theme.counts <- data.frame() 32 | location <- paste0("#", location, "#") 33 | for(i in 1:length(themes)){ 34 | # loop through the themes vector, return # per day of each. 35 | type.i <- themes[i] 36 | tmp <- df[grep(type.i, df$THEMES),] 37 | tmp$Number <- sapply(tmp$LOCATIONS, function(x) length(grep(location, unlist(strsplit(x, ";"))))) 38 | tmp$type <- type.i 39 | theme.counts <- rbind(theme.counts, tmp) 40 | } 41 | theme.counts <- theme.counts[,c("DATE", "Number", "type")] 42 | # just the cols we need, condense by day 43 | theme.counts <- as.data.frame(summarise(group_by(tbl_df(theme.counts), DATE, type), Number=sum(Number))) 44 | theme.counts$DATE <- gDate(theme.counts$DATE) 45 | maxheight <- max(theme.counts$Number) + 10 46 | 47 | if(returndata==TRUE){ 48 | return(theme.counts) 49 | stop() 50 | } 51 | 52 | if(overlay==TRUE){ 53 | # all on the same graph 54 | return(ggplot(data=theme.counts, aes(x=DATE, y=Number, color=type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + ylim(0, maxheight) + ylab("Count") + theme_bw()) 55 | } 56 | 57 | if(overlay==FALSE){ 58 | # on different graphs 59 | return(ggplot(data=theme.counts, aes(x=DATE, y=Number, type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=0.3, se=FALSE, size=1) + facet_wrap(~ type, ncol=1) + theme_bw()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /R/LocationThemes.R: -------------------------------------------------------------------------------- 1 | #' In a GKG subset, how many times are given themes mentioned in conjunction with given locations? 2 | #' 3 | #' This takes a GKG dataframe, a list of themes, and a list of countries and plots the distribution of mentions per country. 4 | #' 5 | #' 6 | #' @param df A subset of the Global Knowledge Graph including more than one country and one theme \code{df} 7 | #' @param themes A vector of themes from GKG. \code{themes} 8 | #' @param countries A vector of countries in country name form \code{countries} 9 | #' 10 | #' @return theme.counts A data frame containing counts per theme per country, suitable for faceted barplotting. 11 | #' 12 | #' @keywords GDELT, gdeltr 13 | #' 14 | #' 15 | #' @export 16 | #' 17 | #' @examples 18 | #' latin.protests <- LocationThemes(protests, themes=c("SLUMS", "ECON", "NEW_CONSTRUCTION", "VIOLENT_UNREST", "PUBLIC_TRANSPORT", "EDUCATION"), countries=c("Brazil", "Argentina", "Venezuela", "Colombia", "Uruguay", "Paraguay", "Bolivia", "Ecuador", "Peru", "Chile", "Mexico", "Honduras")) 19 | #' ggplot(latin.protests, aes(y=Percent, Country, x=Theme, fill=Theme)) + geom_bar(stat="identity") + facet_wrap( ~ Country, nrow=5) + theme_bw() + theme(strip.background = element_rect(fill = 'white'), legend.position="top", axis.ticks = element_blank(), axis.text.x = element_blank()) + labs(x=NULL) 20 | 21 | 22 | 23 | LocationThemes <- function(df, themes, countries){ 24 | # "df" will be the overarching big theme, like protests, for the whole world 25 | theme.counts <- data.frame() 26 | for(i in 1:length(themes)){ 27 | type.i <- themes[i] 28 | locations <- df[grep(type.i, df$THEMES), "LOCATIONS"] 29 | locations <- as.character(unlist(strsplit(locations, ";"))) 30 | locations <- strsplit(as.character(locations), "#") 31 | locations <- sapply(locations, "[", 3) 32 | locations <- as.data.frame(table(locations)) 33 | locations$type <- tolower(gsub("_", " ", type.i)) 34 | theme.counts <- rbind(theme.counts, locations) 35 | } 36 | names(theme.counts) <- c("Country", "Count", "Theme") 37 | countrylist <- countrycode(countries, "country.name", "fips104") 38 | theme.counts <- theme.counts[theme.counts$Country %in% countrylist,] 39 | #print("Second loop done") 40 | #print(head(theme.counts)) 41 | theme.counts$Country <- countrycode(theme.counts$Country, "fips104", "country.name") 42 | countrytotals <- as.character(unlist(strsplit(df$LOCATIONS, ";"))) 43 | countrytotals <- strsplit(as.character(countrytotals), "#") 44 | countrytotals <- sapply(countrytotals, "[", 3) 45 | countrytotals <- as.data.frame(table(countrytotals)) 46 | names(countrytotals) <- c("Country", "Total") 47 | countrytotals$Country <- countrycode(countrytotals$Country, "fips104", "country.name") 48 | theme.counts <- merge(theme.counts, countrytotals, by="Country") 49 | theme.counts$Percent <- theme.counts$Count / theme.counts$Total 50 | return(theme.counts) 51 | } 52 | 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gdeltr 2 | ====== 3 | 4 | `gdeltr` is my ad-hoc collection of functions for working with [GDELT](http://gdelt.utdallas.edu/). It is completely untested on any other machine, has no error catching functions, and has completely excessive dependencies on other packages. I recommend copying and pasting any code you find useful rather than installing the whole package. 5 | 6 | Two basic utilities for working with the traditional event stream: 7 | 8 | * `gDate` for converting dates from a "yyymmdd" string to a "yyyy-mm-dd" Date class. 9 | * `fillSeries` to add missing days to a GDELT data frame for plotting or time series analysis 10 | 11 | Two more advanced functions for pulling events from a `dplyr`/SQLite setup, as described [here](http://andrewhalterman.com/2013/08/28/gdelt_dplyr_sqlite/): 12 | * `subsetEventCountry` for returning lat/long fields given a country name and EventRootCode. 13 | * `getEventCounts` for returning base, root, or regular event codes per month for a given country. 14 | 15 | 16 | ### Global Knowledge Graph 17 | The Global Knowledge Graph is the newest component of GDELT. In his [announcement](http://gdeltblog.wordpress.com/2013/10/27/announcing-the-debut-of-the-gdelt-global-knowledge-graph/), Kalev describes it as an attempt "to connect every person, organization, location, count, theme, news source, and event across the planet into a single massive network that captures what's happening around the world, what its context is and who's involved, and how the world is feeling about it, every single day." 18 | He also points out that it's much more difficult to work with than the original event stream and recommends using Perl or Python for working with it. (For one example, it's a nested structure using a combination of tabs, hashtags, and semicolons as separators.) While Perl/Python may be better, there are lots of people (including me) who are much more comfortable working in R, even if it's inferior. Here are my quick hacks for working with the alpha experimental release of GDELT's Global Knowledge Graph: 19 | * `GKGcomentions` for pulling co-mentioned organizations, people, or countries from a subsetted GKG file. 20 | * `GKGextractcameo` will return the events from the traditional stream associated with a subset of the GKG namespaces. It can return either the vector of GLOBALEVENTIDs, or, if you have a dplyr/sqlite setup, the full data frame of events. 21 | * `toner` will, for a given GKG subset, return the tones associated with each person/place/organization associated with it. 22 | * `GKGcounts` will take a subset of the GKG and return just the info in the `COUNTS` column, nicely formatted. This refers to info in the "Counts" column, not sums of number of events as above. 23 | * `GKGedgelist` will take a ragged data frame with co-mentions and format it into a two-column edge list for export to Gephi or other network analysis tool. 24 | * `write.gephi`: a wrapper for `write.table` that puts quotes around all elements in the df and writes with semicolon separators and without row/column names. 25 | * `nameFixer` will standardize names from the GKG. Only has about 30 (mostly Syria-related) names right now. This isn't really worth using yet and I'm sure there's a better approach than this. 26 | * `themeTrend`. This will plot the number of mentions of arbitrary themes per day. 27 | * `toneTrend`. This will plot the average tone of coverage per day of persons, organizations, or themes. 28 | 29 | -------------------------------------------------------------------------------- /R/GKGLatLong.R: -------------------------------------------------------------------------------- 1 | #' Create a geographic node/edgelist from a GKG dataframe. 2 | #' 3 | #' This takes a GKG dataframe and creates node and edgelists of the co-mentioned geographic locations. These node and edgelists can be imported into Gephi and viewed with the "Geo Layout" option. 4 | #' Its saves the edge and node lists in the working directory under file names that you specify. 5 | #' 6 | #' @param \code{gkg.df} A subset of the Global Knowledge Graph 7 | #' @param \code{filename} The filename of the output (exclude file endings) 8 | #' 9 | #' @return edgelist.csv A semicolon-separated csv with an edgelist using the full geographic names. 10 | #' @return nodelist.csv A semicolon-separated csv including label, id, latitude, and longitude. 11 | #' 12 | #' @keywords GDELT, gdeltr 13 | #' 14 | #' @details The files that the function saves can be imported into Gephi. Import the nodelist file first, and make sure that \code{lat} and \code{lng} are set to "Double". Import the edgelist next. After importing both, clear up any duplicates by going to Data Labratory > More Actions > Detect and Merge Duplicates, merging on the Label field. 15 | #' Make sure you have the Geo Layout plugin installed, as well as the "Map of Countries" plugin if you want to do the full visualization inside Gephi. 16 | #' 17 | #'Feature to add: Take the full dataframe, separate by date to allow dynamic graphs. 18 | #' 19 | #' @export 20 | #' 21 | #' @examples 22 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 23 | #' GKGLatLong(ieds, file="ied.network") 24 | 25 | GKGLatLong <- function(gkg.df, filename){ 26 | if (!"ORGANIZATIONS" %in% names(gkg.df)) stop("No column named 'ORGANIZATIONS'") 27 | locations.original <- as.character(gkg.df$LOCATIONS) 28 | locations <- strsplit(locations.original, split=";") 29 | nMax <- max(sapply(locations, length)) 30 | locations <- cbind(t(sapply(locations, function(i) i[1:nMax]))) 31 | locations <- as.data.frame(locations, stringsAsFactors=FALSE) 32 | ## Remove all the single-node (non-edge) rows 33 | locations <- subset(locations, locations[,2] != "NA") 34 | # Now we need to change each cell into just a FullName 35 | locations.df <- as.data.frame(locations[1,]) 36 | 37 | for (col.tmp in 1:ncol(locations)){ 38 | locations.one <- t(locations[col.tmp,]) 39 | locations.one <- strsplit(locations.one, "#") 40 | nMax <- max(sapply(locations.one, length)) 41 | locations.one <- cbind(t(sapply(locations.one, function(i) i[1:nMax]))) 42 | locations.one <- as.data.frame(locations.one, stringsAsFactors=FALSE) 43 | locations.one <- as.data.frame(t(locations.one[,2])) 44 | locations.df <- rbind(locations.df, locations.one) 45 | } 46 | locations.df <- subset(locations.df, locations.df[,2] != "NA") 47 | locations.df <- locations.df[2:49,] 48 | locations.edgelist <- GKGedgelist(locations.df, max.connections=24) 49 | locations.edgelist <- as.data.frame(locations.edgelist) 50 | locations.edgelist$type <- "Undirected" 51 | names(locations.edgelist) <- c("Source", "Target", "Type") 52 | 53 | node.latlong <- strsplit(locations.original, split=";") 54 | node.latlong <- unlist(node.latlong) 55 | node.latlong <- strsplit(node.latlong, split="#") 56 | nMax <- max(sapply(node.latlong, length)) 57 | node.latlong <- cbind(t(sapply(node.latlong, function(i) i[1:nMax]))) 58 | node.latlong <- as.data.frame(node.latlong, stringsAsFactors=FALSE) 59 | node.latlong <- node.latlong[,c(2,5,6)] 60 | names(node.latlong) <- c("id", "lat", "lng") 61 | node.latlong$label <- node.latlong$id 62 | 63 | node.filename <- paste0(filename, ".nodes.csv") 64 | edge.filename <- paste0(filename, ".edges.csv") 65 | write.table(locations.edgelist, file=edge.filename, sep=";", row.names=FALSE) 66 | write.table(node.latlong, file=node.filename, sep=";", row.names=FALSE) 67 | } -------------------------------------------------------------------------------- /R/GKGcomentions.R: -------------------------------------------------------------------------------- 1 | #' Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions. 2 | #' 3 | #' This takes a GKG dataframe (or subset thereof) returns a dataframe with all co-mentioned entities of the desired type listed on the same row. 4 | #' This is designed for export to social network analysis software. Run the output through \code{write.gephi} if needed. 5 | #' New feature: uses \code{nameFixer} to standardize people names. 6 | #' 7 | #' @param gkg.df A subset of the Global Knowledge Graph \code{gkg.df} 8 | #' @param type Data types to subset: "themes", "persons", "organizations", "countries", or "latlong". \code{gkg.df} 9 | #' 10 | #' @return countries.df A data frame containing count information. 11 | #' 12 | #' @keywords GDELT, gdeltr 13 | #' 14 | #' @export 15 | #' 16 | #' @examples 17 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),] 18 | #' ieds.orgs <- GKGcomentions(ieds, type="organizations") 19 | 20 | 21 | GKGcomentions <- function(gkg.df, type) { 22 | if (type=="organizations" | type=="orgs"){ 23 | if (!"ORGANIZATIONS" %in% names(gkg.df)) stop("No column named 'ORGANIZATIONS'") 24 | orgs <- gkg.df$ORGANIZATIONS 25 | if (is.factor(orgs)==TRUE){orgs <- as.character(orgs)} 26 | orgs <- strsplit(orgs, split=";") 27 | nMax <- max(sapply(orgs, length)) 28 | orgs <- cbind(t(sapply(orgs, function(i) i[1:nMax]))) 29 | orgs <- as.data.frame(orgs) 30 | return(orgs) 31 | } 32 | if (type=="themes"){ 33 | if (!"THEMES" %in% names(gkg.df)) stop("No column named 'THEMES'") 34 | themes <- gkg.df$THEMES 35 | if (is.factor(themes)==TRUE){themes <- as.character(themes)} 36 | themes <- strsplit(themes, split=";") 37 | nMax <- max(sapply(themes, length)) 38 | themes <- cbind(t(sapply(themes, function(i) i[1:nMax]))) 39 | themes <- as.data.frame(themes) 40 | return(themes) 41 | } 42 | 43 | if (type=="persons"){ 44 | if (!"PERSONS" %in% names(gkg.df)) stop("No column named 'PERSONS'") 45 | persons <- gkg.df$PERSONS 46 | if (is.factor(persons)==TRUE){persons <- as.character(persons)} 47 | persons <- strsplit(persons, split=";") 48 | nMax <- max(sapply(persons, length)) 49 | persons <- cbind(t(sapply(persons, function(i) i[1:nMax]))) 50 | persons <- as.data.frame(persons) 51 | for (i in 1:ncol(persons)){ 52 | persons[,i] <- nameFixer(persons[,i]) 53 | } 54 | return(persons) 55 | } 56 | if (type=="countries"){ 57 | if (!"LOCATIONS" %in% names(gkg.df)) stop("No column named 'LOCATIONS'") 58 | countries <- gkg.df$LOCATIONS 59 | if (is.factor(countries)==TRUE){countries <- as.character(countries)} 60 | countries <- strsplit(countries, split=";") 61 | nMax <- max(sapply(countries, length)) 62 | countries <- cbind(t(sapply(countries, function(i) i[1:nMax]))) 63 | countries <- countries[,3] 64 | countries.df <- data.frame(row.names=1:nrow(countries)) 65 | for (i in 1:ncol(countries)) { 66 | tmp <- countries[,i] 67 | tmp1 <- strsplit(tmp, split="#") 68 | tmp2 <- sapply(tmp1, "[", 3) 69 | countries.df <- cbind(countries.df, tmp2) 70 | } 71 | cc <- function(x) {countrycode(x, "fips104", "country.name")} 72 | countries.df <- as.data.frame(lapply(countries.df[,1:ncol(countries.df)],FUN = function(x) {sapply(x,FUN=cc)})) 73 | return(countries.df) 74 | } 75 | 76 | if (type=="latlong"){ 77 | if (!"LOCATIONS" %in% names(gkg.df)) stop("No column named 'LOCATIONS'") 78 | latlong <- gkg.df$LOCATIONS 79 | if (is.factor(latlong)==TRUE){latlong <- as.character(latlong)} 80 | latlong <- strsplit(latlong, split=";") 81 | nMax <- max(sapply(latlong, length)) 82 | latlong <- cbind(t(sapply(latlong, function(i) i[1:nMax]))) 83 | latlong <- latlong[,3] 84 | latlong.df <- data.frame(row.names=1:nrow(latlong)) 85 | for (i in 1:ncol(latlong)) { 86 | tmp <- latlong[,i] 87 | tmp1 <- strsplit(tmp, split="#") 88 | tmp2 <- sapply(tmp1, "[", 3) 89 | latlong.df <- cbind(latlong.df, tmp2) 90 | } 91 | } 92 | 93 | 94 | } 95 | -------------------------------------------------------------------------------- /R/getEventCounts.R: -------------------------------------------------------------------------------- 1 | #' Get event counts per country-month from GDELT 2 | #' 3 | #' By default, this goes back to Jan 1 2000, but you can change it. 4 | #' Inputs must be characters. 5 | #' Requires the \code{countrycode} package to translate from country name to FIPS104. 6 | #' Assumes you have GDELT in a dplyr tble in tables called "hist.db" and "daily.db". I have them in a SQLite database, but dplyr will let you use whatever you want. See Hadley Wickham's github page. 7 | #' 8 | #' Reverted to a prev. version. No pre-build scaffolding to get all of the columns. 9 | #' 10 | #' 11 | #' @param country.name A normal English country name (character) \code{country.name} 12 | #' @param eventtype What event code resolution? Options: "code", "base", "root", "quad". \code{eventtype} 13 | #' @param min.date Furthest date back you want (numeric). \code{min.date} 14 | #' 15 | #' @return df A data frame counts per event per month in the country 16 | #' 17 | #' @keywords GDELT, gdeltr 18 | #' 19 | #' @export 20 | #' 21 | #' @examples 22 | #' mex.protest <- getEventCounts("Mexico", eventtype="root", min.date==20000101) 23 | 24 | 25 | getEventCounts <- function(countryname, eventtype="root", min.date=20000101){ 26 | require(countrycode) 27 | require(reshape2) 28 | require(dplyr) 29 | require(RSQLite) 30 | require(RSQLite.extfuns) 31 | 32 | fips.code <- countrycode(countryname, "country.name", "fips104") 33 | iso.code <- countrycode(countryname, "country.name", "iso3c") 34 | 35 | if (eventtype=="root") { 36 | df <- select(hist.db, SQLDATE, MonthYear, EventRootCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 37 | df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 38 | df <- as.data.frame(df) 39 | df <- tbl_df(df) 40 | df <- group_by(df, EventRootCode, MonthYear) 41 | df <- summarise(df, count=n()) 42 | df <- as.data.frame(df) 43 | df <- dcast(df, MonthYear ~ EventRootCode) 44 | 45 | df2 <- select(daily.db, SQLDATE, MonthYear, EventRootCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 46 | df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 47 | df2 <- as.data.frame(df2) 48 | df2 <- tbl_df(df2) 49 | df2 <- group_by(df2, EventRootCode, MonthYear) 50 | df2 <- summarise(df2, count=n()) 51 | df2 <- as.data.frame(df2) 52 | df2 <- dcast(df2, MonthYear ~ EventRootCode) 53 | df <- merge(df, df2, all=TRUE) 54 | df[is.na(df)] <- 0 55 | return(df) 56 | } 57 | if (eventtype=="base") { 58 | df <- select(hist.db, SQLDATE, MonthYear, EventBaseCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 59 | df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 60 | df <- as.data.frame(df) 61 | df <- tbl_df(df) 62 | df <- group_by(df, EventBaseCode, MonthYear) 63 | df <- summarise(df, count=n()) 64 | df <- as.data.frame(df) 65 | df <- dcast(df, MonthYear ~ EventBaseCode) 66 | 67 | df2 <- select(daily.db, SQLDATE, MonthYear, EventBaseCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 68 | df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 69 | df2 <- as.data.frame(df2) 70 | df2 <- tbl_df(df2) 71 | df2 <- group_by(df2, EventBaseCode, MonthYear) 72 | df2 <- summarise(df2, count=n()) 73 | df2 <- as.data.frame(df2) 74 | df2 <- dcast(df2, MonthYear ~ EventBaseCode) 75 | df <- merge(df, df2) 76 | df[is.na(df)] <- 0 77 | return(df) 78 | } 79 | if (eventtype=="code") { 80 | df <- select(hist.db, SQLDATE, MonthYear, EventCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 81 | df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 82 | df <- as.data.frame(df) 83 | df <- tbl_df(df) 84 | df <- group_by(df, EventCode, MonthYear) 85 | df <- summarise(df, count=n()) 86 | df <- as.data.frame(df) 87 | df <- dcast(df, MonthYear ~ EventCode) 88 | 89 | df2 <- select(daily.db, SQLDATE, MonthYear, EventCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 90 | df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 91 | df2 <- as.data.frame(df2) 92 | df2 <- tbl_df(df2) 93 | df2 <- group_by(df2, EventCode, MonthYear) 94 | df2 <- summarise(df2, count=n()) 95 | df2 <- as.data.frame(df2) 96 | df2 <- dcast(df2, MonthYear ~ EventCode) 97 | df <- merge(df, df2, all=TRUE) 98 | df[is.na(df)] <- 0 99 | return(df) 100 | } 101 | if (eventtype=="quad") { 102 | df <- select(hist.db, SQLDATE, MonthYear, QuadClass, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 103 | df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 104 | df <- as.data.frame(df) 105 | df <- tbl_df(df) 106 | df <- group_by(df, QuadClass, MonthYear) 107 | df <- summarise(df, count=n()) 108 | df <- as.data.frame(df) 109 | df <- dcast(df, MonthYear ~ QuadClass) 110 | 111 | df2 <- select(daily.db, SQLDATE, MonthYear, QuadClass, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode) 112 | df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code) 113 | df2 <- as.data.frame(df2) 114 | df2 <- tbl_df(df2) 115 | df2 <- group_by(df2, QuadClass, MonthYear) 116 | df2 <- summarise(df2, count=n()) 117 | df2 <- as.data.frame(df2) 118 | df2 <- dcast(df2, MonthYear ~ QuadClass) 119 | df <- merge(df, df2, all=TRUE) 120 | df[is.na(df)] <- 0 121 | return(df) 122 | } 123 | } --------------------------------------------------------------------------------