├── R
    ├── .DS_Store
    ├── gdeltr-package.r
    ├── nameFixer_data.R
    ├── nameFixer.R
    ├── gDate.R
    ├── write.gephi.R
    ├── fillSeries.R
    ├── GKGextractcameo.R
    ├── GKGcounts.R
    ├── toner.R
    ├── subsetEventCountry.R
    ├── GKGedgelist.R
    ├── toneTrend.R
    ├── themeTrend.R
    ├── LocationThemes.R
    ├── GKGLatLong.R
    ├── GKGcomentions.R
    └── getEventCounts.R
├── data
    ├── .DS_Store
    ├── nameFixer_data.rda
    └── .Rapp.history
├── man
    ├── gdeltr.Rd
    ├── nameFixer_data.Rd
    ├── nameFixer.Rd
    ├── getCounts.Rd
    ├── gDate.Rd
    ├── GKGextractcameo.Rd
    ├── toner.Rd
    ├── write.gephi.Rd
    ├── fillSeries.Rd
    ├── GKGcomentions.Rd
    ├── getComentions.Rd
    ├── themeTrend.Rd
    ├── GKGcounts.Rd
    ├── getEventCounts.Rd
    ├── subsetEventCountry.Rd
    ├── toneTrend.Rd
    ├── LocationThemes.Rd
    ├── GKGedgelist.Rd
    └── GKGLatLong.Rd
├── NAMESPACE
├── DESCRIPTION
├── LICENSE
└── README.md


/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/R/.DS_Store


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/data/.DS_Store


--------------------------------------------------------------------------------
/R/gdeltr-package.r:
--------------------------------------------------------------------------------
1 | #' gdeltr
2 | #'
3 | #' @name gdeltr
4 | #' @docType package
5 | NULL
6 | 


--------------------------------------------------------------------------------
/data/nameFixer_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ahalterman/gdeltr/HEAD/data/nameFixer_data.rda


--------------------------------------------------------------------------------
/data/.Rapp.history:
--------------------------------------------------------------------------------
1 | load("/Users/andyhalterman/R/gdeltr/data/nameFixer_data.rda")
2 | load("/Users/andyhalterman/R/gdeltr/data/nameFixer_data.rda")
3 | 


--------------------------------------------------------------------------------
/man/gdeltr.Rd:
--------------------------------------------------------------------------------
 1 | \docType{package}
 2 | \name{gdeltr}
 3 | \alias{gdeltr}
 4 | \alias{gdeltr-package}
 5 | \title{gdeltr}
 6 | \description{
 7 | gdeltr
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | export(GKGLatLong)
 2 | export(GKGcomentions)
 3 | export(GKGcounts)
 4 | export(GKGedgelist)
 5 | export(GKGextractcameo)
 6 | export(LocationThemes)
 7 | export(fillSeries)
 8 | export(gDate)
 9 | export(getEventCounts)
10 | export(nameFixer)
11 | export(subsetEventCountry)
12 | export(themeTrend)
13 | export(toneTrend)
14 | export(toner)
15 | export(write.gephi)
16 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: gdeltr
 2 | Title: R tools for GDELT
 3 | Description: Useful tools for working with GDELT and the Global Knowledge Graph
 4 | Version: 0.1
 5 | Author: 'Andrew Halterman' <ahalterman0@gmail.com>
 6 | Maintainer: 'Andrew Halterman' <ahalterman0@gmail.com>
 7 | Depends:
 8 |     R (>= 3.0.1),
 9 |     dplyr,
10 |     countrycode,
11 |     RSQLite,
12 |     RSQLite.extfuns
13 | License: MIT
14 | LazyData: true
15 | ByteCompile: true
16 | 


--------------------------------------------------------------------------------
/R/nameFixer_data.R:
--------------------------------------------------------------------------------
 1 | #' Name translation dataframe for GKG 
 2 | #' 
 3 | 
 4 | #' 
 5 | #' \itemize{
 6 | #'   \item oldvalue: The name to be replaced 
 7 | #'   \item newvalue: The standard name form
 8 | #' }
 9 | #' 
10 | #' @note This is a work in progress.
11 | #' @docType data
12 | #' @keywords datasets, gdelt, gdeltr
13 | #' @name nameFixer_data 
14 | #' @usage nameFixer_data
15 | #' @format A data frame with a few dozen rows and 2 columns
16 | NULL


--------------------------------------------------------------------------------
/man/nameFixer_data.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{nameFixer_data}
 3 | \alias{nameFixer_data}
 4 | \title{Name translation dataframe for GKG}
 5 | \format{A data frame with a few dozen rows and 2 columns}
 6 | \usage{
 7 | nameFixer_data
 8 | }
 9 | \description{
10 | \itemize{ \item oldvalue: The name to be replaced \item
11 | newvalue: The standard name form }
12 | }
13 | \note{
14 | This is a work in progress.
15 | }
16 | \keyword{datasets,}
17 | \keyword{gdelt,}
18 | \keyword{gdeltr}
19 | 
20 | 


--------------------------------------------------------------------------------
/man/nameFixer.Rd:
--------------------------------------------------------------------------------
 1 | \name{nameFixer}
 2 | \alias{nameFixer}
 3 | \title{Standardize names in a data frame.}
 4 | \usage{
 5 | nameFixer(namevector)
 6 | }
 7 | \arguments{
 8 |   \item{data}{A vector of names \code{gkg.df}}
 9 | }
10 | \value{
11 | newvec A vector with cleaned names
12 | }
13 | \description{
14 | This will be very much a function in progress.  The focus
15 | is on Syrian names right now. Credit to
16 | http://susanejohnston.wordpress.com/ for find-and-replace
17 | code.
18 | }
19 | \examples{
20 | Some R code here
21 | }
22 | \keyword{GDELT,}
23 | \keyword{gdeltr}
24 | 
25 | 


--------------------------------------------------------------------------------
/man/getCounts.Rd:
--------------------------------------------------------------------------------
 1 | \name{getCounts}
 2 | \alias{getCounts}
 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return the info in the "Counts" field as a data frame.}
 4 | \usage{
 5 |   getCounts(gkg.df)
 6 | }
 7 | \arguments{
 8 |   \item{gkg.df}{\code{gkg.df}}
 9 | }
10 | \value{
11 |   counts A data frame containing count information.
12 | }
13 | \description{
14 |   This will only give you the info in the counts field, and
15 |   in no particular order. Next steps: getting the date and
16 |   themes to come with it.
17 | }
18 | \examples{
19 | R code here showing how your function works
20 | }
21 | \keyword{GDELT,}
22 | \keyword{gdeltr}
23 | 
24 | 


--------------------------------------------------------------------------------
/man/gDate.Rd:
--------------------------------------------------------------------------------
 1 | \name{gDate}
 2 | \alias{gDate}
 3 | \title{Given a vector of 8 digit dates (yyyymmdd), returns a date obj in "yyyy-mm-dd"}
 4 | \usage{
 5 | gDate(date.vector)
 6 | }
 7 | \arguments{
 8 |   \item{date.vector}{A vector of the SQLDATE column from
 9 |   GDELT, in form yyyymmdd \code{date.vector}}
10 | }
11 | \value{
12 | newdate A vector of class date with "yyyy-mm-dd" format
13 | \code{newdate}
14 | }
15 | \description{
16 | This comes up a lot working with GDELT. Make sure you only
17 | pass it a vector, not the whole data frame!
18 | }
19 | \examples{
20 | R code here showing how your function works
21 | }
22 | \keyword{GDELT,}
23 | \keyword{gdeltr}
24 | 
25 | 


--------------------------------------------------------------------------------
/man/GKGextractcameo.Rd:
--------------------------------------------------------------------------------
 1 | \name{GKGextractcameo}
 2 | \alias{GKGextractcameo}
 3 | \title{Extract CAMEO events from GKG}
 4 | \usage{
 5 | GKGextractcameo(df, justvector = TRUE)
 6 | }
 7 | \arguments{
 8 |   \item{df}{A dataframe of GKG namesets}
 9 | 
10 |   \item{justvector}{Return vector of ID numbers instead of
11 |   actual data frame?}
12 | }
13 | \value{
14 | gdelt.df A vector of all linked CAMEO event IDs
15 | }
16 | \description{
17 | From a GKG subset dataframe, return a dataframe of all
18 | linked CAMEO event IDs. If \code{justvector=TRUE}, the
19 | fuction will return only a vector of linked CAMEO event IDs
20 | rather than the complete dataframe of all matching events.
21 | }
22 | \examples{
23 | cameos.events <- GKGextractcameo(mexico.cartels)
24 | }
25 | \keyword{GDELT,}
26 | \keyword{gdeltr}
27 | 
28 | 


--------------------------------------------------------------------------------
/R/nameFixer.R:
--------------------------------------------------------------------------------
 1 | #' Standardize names in a data frame.
 2 | #' 
 3 | #' This will be very much a function in progress.  The focus is on Syrian names right now.
 4 | #' Credit to http://susanejohnston.wordpress.com/ for find-and-replace code.
 5 | #'
 6 | #' @param data A vector of names \code{gkg.df}
 7 | #'
 8 | #' @return newvec A vector with cleaned names
 9 | #'
10 | #' @keywords GDELT, gdeltr
11 | #'
12 | #' @export
13 | #' 
14 | #' @examples
15 | #' Some R code here
16 | 
17 | 
18 | nameFixer <- function(namevector) {
19 |   if (is.factor(namevector)){
20 |     namevector <- as.character(namevector)
21 |     }
22 |     oldvalue <- nameFixer_data[,1]
23 |     newvalue <- nameFixer_data[,2]
24 |    
25 |     newvec <- namevector    
26 |     for (i in unique(oldvalue)) newvec[namevector == i] <- newvalue[oldvalue == i]
27 |     return(newvec)
28 |   }


--------------------------------------------------------------------------------
/R/gDate.R:
--------------------------------------------------------------------------------
 1 | #' Given a vector of 8 digit dates (yyyymmdd), returns a date obj in "yyyy-mm-dd"
 2 | #' 
 3 | #' This comes up a lot working with GDELT.  
 4 | #' Make sure you only pass it a vector, not the whole data frame!
 5 | #'
 6 | #' @param date.vector A vector of the SQLDATE column from GDELT, in form yyyymmdd \code{date.vector}
 7 | #'
 8 | #' @return newdate A vector of class date with "yyyy-mm-dd" format  \code{newdate}
 9 | #'
10 | #' @keywords GDELT, gdeltr
11 | #'
12 | #' @export
13 | #' 
14 | #' @examples
15 | #' R code here showing how your function works
16 | 
17 | 
18 | gDate <- function(date.vector) {
19 |   date.vector <- as.character(date.vector)
20 |   x <- substr(date.vector, 1, 4)
21 |   y <- substr(date.vector, 5, 6)
22 |   z <- substr(date.vector, 7, 8)
23 |   date.vector <- paste(x,y,z,sep="-")
24 |   newdate <- as.Date(date.vector, format="%Y-%m-%d")
25 |   return(newdate)
26 | }


--------------------------------------------------------------------------------
/man/toner.Rd:
--------------------------------------------------------------------------------
 1 | \name{toner}
 2 | \alias{toner}
 3 | \title{Given a GKG subset, return the tones associated with each person/place/organization}
 4 | \usage{
 5 | toner(df, type)
 6 | }
 7 | \arguments{
 8 |   \item{df}{A subset of the GKG, probably along one
 9 |   theme\code{df}}
10 | 
11 |   \item{type}{Return tones of organization, locations, or
12 |   persons?}
13 | 
14 |   \item{summarize}{Should the mean for each unique entity
15 |   be returned?  Caution: lots of alt. spellings
16 |   \code{summarize}}
17 | }
18 | \value{
19 | tones A df with names/locations and tones (and counts if
20 | summarized).
21 | }
22 | \description{
23 | summarize will return the mean tone for each entity.  This
24 | feature isn't done yet.
25 | }
26 | \examples{
27 | > ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
28 | > person.tone.ieds <- toner(ieds, type="persons")
29 | > dim(person.tone.ieds)
30 | [1] 4545    2
31 | }
32 | \keyword{GDELT,}
33 | \keyword{gdeltr}
34 | 
35 | 


--------------------------------------------------------------------------------
/man/write.gephi.Rd:
--------------------------------------------------------------------------------
 1 | \name{write.gephi}
 2 | \alias{write.gephi}
 3 | \title{Wrapper for write.table for outputting from the GKG to Gephi}
 4 | \usage{
 5 | write.gephi(gkg.df, filename, type)
 6 | }
 7 | \arguments{
 8 |   \item{gkg.df}{A dataframe to export to gephi
 9 |   \code{gkg.df}}
10 | 
11 |   \item{filename}{The name for the file.  Call it .csv even
12 |   though its semicolons \code{gkg.df}}
13 | 
14 |   \item{type}{ragged or list?  List will generate an edge
15 |   list rather than a ragged data frame.}
16 | }
17 | \value{
18 | gkg.df A semicolon seperated file with quotes.
19 | }
20 | \description{
21 | Specifically, it adds quotes to prevent extra splitting,
22 | removes row/col names, and saves with a semicolon
23 | separator. Obvs., it's undirected. If it's a node list, the
24 | nodes MUST be in a column labeled "ID".
25 | }
26 | \examples{
27 | R code here showing how your function works
28 | }
29 | \keyword{GDELT,}
30 | \keyword{gdeltr}
31 | 
32 | 


--------------------------------------------------------------------------------
/man/fillSeries.Rd:
--------------------------------------------------------------------------------
 1 | \name{fillSeries}
 2 | \alias{fillSeries}
 3 | \title{Fills in missing dates in a data frame of GDELT events for plotting or time series analysis}
 4 | \usage{
 5 | fillSeries(df, begin.date = "2000-01-01", end.date = "2013-09-30",
 6 |   date.column = "SQLDATE", extraclean = FALSE)
 7 | }
 8 | \arguments{
 9 |   \item{df}{A GDELT dataframe.  \code{df}}
10 | 
11 |   \item{begin.date}{The earliest date.  Defaults to Jan 1,
12 |   2000.  \code{begin.date}}
13 | 
14 |   \item{end.date}{The last date.  Defaults to Sept 30,
15 |   2013.  \code{end.date}}
16 | 
17 |   \item{date.column}{The name of the column containing
18 |   dates.  Defaults to "SQLDATE" \code{date.column}}
19 | }
20 | \value{
21 | df2 A
22 | }
23 | \description{
24 | Fills in missing dates in a data frame of GDELT events for
25 | plotting or time series analysis
26 | }
27 | \examples{
28 | R code here showing how your function works
29 | }
30 | \keyword{GDELT,}
31 | \keyword{gdeltr}
32 | 
33 | 


--------------------------------------------------------------------------------
/man/GKGcomentions.Rd:
--------------------------------------------------------------------------------
 1 | \name{GKGcomentions}
 2 | \alias{GKGcomentions}
 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions.}
 4 | \usage{
 5 | GKGcomentions(gkg.df, type)
 6 | }
 7 | \arguments{
 8 |   \item{gkg.df}{A subset of the Global Knowledge Graph
 9 |   \code{gkg.df}}
10 | 
11 |   \item{type}{Data types to subset: "themes", "persons",
12 |   "organizations", "countries", or "latlong".
13 |   \code{gkg.df}}
14 | }
15 | \value{
16 | countries.df A data frame containing count information.
17 | }
18 | \description{
19 | This takes a GKG dataframe (or subset thereof) returns a
20 | dataframe with all co-mentioned entities of the desired
21 | type listed on the same row. This is designed for export to
22 | social network analysis software.  Run the output through
23 | \code{write.gephi} if needed. New feature: uses
24 | \code{nameFixer} to standardize people names.
25 | }
26 | \examples{
27 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
28 | ieds.orgs <- GKGcomentions(ieds, type="organizations")
29 | }
30 | \keyword{GDELT,}
31 | \keyword{gdeltr}
32 | 
33 | 


--------------------------------------------------------------------------------
/man/getComentions.Rd:
--------------------------------------------------------------------------------
 1 | \name{getComentions}
 2 | \alias{getComentions}
 3 | \title{Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions.}
 4 | \usage{
 5 |   getComentions(gkg.df, type)
 6 | }
 7 | \arguments{
 8 |   \item{gkg.df}{A subset of the Global Knowledge Graph
 9 |   \code{gkg.df}}
10 | 
11 |   \item{type}{Data types to subset: "themes", "persons",
12 |   "organizations", "countries", or "placenames".
13 |   \code{gkg.df}}
14 | }
15 | \value{
16 |   co-mentions A data frame containing count information.
17 | }
18 | \description{
19 |   This takes a GKG dataframe (or subset thereof) returns a
20 |   dataframe with all co-mentioned entities of the desired
21 |   type listed on the same row. This is designed for export
22 |   to social network analysis software.  Run the output
23 |   through \code{write.gephi} if needed. New feature: uses
24 |   \code{nameFixer} to standardize people names.
25 | }
26 | \examples{
27 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
28 | ieds.orgs <- getCo-mentions(ieds, type="organizations")
29 | }
30 | \keyword{GDELT,}
31 | \keyword{gdeltr}
32 | 
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Andy Halterman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/R/write.gephi.R:
--------------------------------------------------------------------------------
 1 | #' Wrapper for write.table for outputting from the GKG to Gephi
 2 | #' 
 3 | #' Specifically, it adds quotes to prevent extra splitting, removes row/col names, and saves with a semicolon separator.
 4 | #' Obvs., it's undirected.
 5 | #' If it's a node list, the nodes MUST be in a column labeled "ID".
 6 | #'
 7 | #' @param gkg.df A dataframe to export to gephi \code{gkg.df}
 8 | #' @param filename The name for the file.  Call it .csv even though its semicolons \code{gkg.df}
 9 | #' @param type ragged or list?  List will generate an edge list rather than a ragged data frame.
10 | #'
11 | #' @return gkg.df A semicolon seperated file with quotes.
12 | #'
13 | #' @keywords GDELT, gdeltr
14 | #'
15 | #' @export
16 | #' 
17 | #' @examples
18 | #' R code here showing how your function works
19 | 
20 | write.gephi <- function(gkg.df, filename, type) { 
21 | pst <- function(x) {paste0("\'", x, "\'")}
22 | if (type=="edge") {
23 | gkg.df <- as.data.frame(lapply(gkg.df[,1:ncol(gkg.df)], FUN= function(x) {sapply(x, FUN=pst)}))
24 | }
25 | if (type=="node"){
26 |   gkg.df$ID <- pst(gkg.df$ID)
27 | }
28 | 
29 | write.table(gkg.df, file=filename, sep=";", row.names=FALSE, col.names=TRUE)
30 | }


--------------------------------------------------------------------------------
/man/themeTrend.Rd:
--------------------------------------------------------------------------------
 1 | \name{themeTrend}
 2 | \alias{themeTrend}
 3 | \title{Graph changes in themes over time, given a GKG subset.}
 4 | \usage{
 5 | themeTrend(df, themes, location, overlay = TRUE, returndata = FALSE,
 6 |   span = 0.3)
 7 | }
 8 | \arguments{
 9 |   \item{df}{A subset of the Global Knowledge Graph,
10 |   probably a country \code{gkg.df}}
11 | 
12 |   \item{themes}{A vector of themes from GKG.}
13 | 
14 |   \item{span}{The smoothing factor for the loess curve.
15 |   Default is 0.3}
16 | 
17 |   \item{location}{A location, potentially more specific
18 |   than the data frame subset.}
19 | 
20 |   \item{overlay}{Should the lines be plotted on the same
21 |   graph or separate?}
22 | 
23 |   \item{returndata}{If true, returns the raw data and does
24 |   not plot anything}
25 | }
26 | \value{
27 | theme.counts A data frame containing number of events per
28 | day per theme.
29 | }
30 | \description{
31 | This takes a GKG dataframe and a list of themes and plots
32 | the mentions of the themes over time.
33 | }
34 | \examples{
35 | gkg <- read.csv("gkg.csv")
36 | mex <- gkg[grep("Mexico", gkg$LOCATIONS),]
37 | themeTrends(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), location="Mexico")
38 | }
39 | \keyword{GDELT,}
40 | \keyword{gdeltr}
41 | 
42 | 


--------------------------------------------------------------------------------
/man/GKGcounts.Rd:
--------------------------------------------------------------------------------
 1 | \name{GKGcounts}
 2 | \alias{GKGcounts}
 3 | \title{Given a dataframe of "Counts" information from the GKG}
 4 | \usage{
 5 | GKGcounts(gkg)
 6 | }
 7 | \arguments{
 8 |   \item{gkg}{A subset dataframe of the GKG. \code{gkg}}
 9 | }
10 | \value{
11 | counts A data frame containing information from the
12 | \code{Counts} column.
13 | }
14 | \description{
15 | The Global Knowledge Graph contains two elements, the
16 | "Counts" file, containing information on the numbers of
17 | people killed, affected, etc. every day by location. The
18 | second file, the "Graph file", contains the associated
19 | themes, organizations, people, and locations.  Filtering,
20 | especially by themes, is very useful, but much of the
21 | useable information in the GKG is in the Counts file.  This
22 | function will return the Counts file, nicely formatted
23 | (it's \code{;} and \code{#} separated, which is a hassle),
24 | in no particular order.  It loses the date and theme
25 | information, though, which is the next room for
26 | improvement.
27 | }
28 | \examples{
29 | # Say we were interested in the number of people killed by mines/IEDs.
30 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
31 | ieds.counts <- GKGcounts(ieds)
32 | }
33 | \keyword{GDELT,}
34 | \keyword{gdeltr}
35 | 
36 | 


--------------------------------------------------------------------------------
/R/fillSeries.R:
--------------------------------------------------------------------------------
 1 | #' Fills in missing dates in a data frame of GDELT events for plotting or time series analysis
 2 | #' 
 3 | #'
 4 | #' @param df A GDELT dataframe.  \code{df}
 5 | #' @param begin.date The earliest date.  Defaults to Jan 1, 2000.  \code{begin.date}
 6 | #' @param end.date The last date.  Defaults to Sept 30, 2013.  \code{end.date}
 7 | #' @param date.column The name of the column containing dates.  Defaults to "SQLDATE" \code{date.column}
 8 | #' 
 9 | #'
10 | #' @return df2 A 
11 | #'
12 | #' @keywords GDELT, gdeltr
13 | #'
14 | #' @export
15 | #' 
16 | #' @examples
17 | #' R code here showing how your function works
18 | #' 
19 | 
20 | fillSeries <- function(df, begin.date="2000-01-01", end.date="2013-09-30", date.column="SQLDATE", extraclean=FALSE){
21 |   if (class(df$date.column)=="integer"){
22 |     df$date.column <- gDate(df$date.column)
23 |   }
24 |   daily <- as.data.frame(seq(from=as.Date(begin.date), to=as.Date(end.date), by="1 day"))
25 |   names(daily) <- "Date"
26 |   df <- merge(x=daily, y=df, by.x="Date", by.y=date.column, all.x=TRUE)
27 |   if (extraclean==TRUE) {
28 |   df[is.na(df$count),5] <- 0
29 |   df <- df[,c(1:5)]
30 |   names(df) <- c("Date", "ActionGeo_Lat", "ActionGeo_Long", "EventRootCode", "Count")
31 |   return(df)
32 |   }
33 |   else {
34 |   return(df)
35 |   }
36 | }


--------------------------------------------------------------------------------
/man/getEventCounts.Rd:
--------------------------------------------------------------------------------
 1 | \name{getEventCounts}
 2 | \alias{getEventCounts}
 3 | \title{Get event counts per country-month from GDELT}
 4 | \usage{
 5 | getEventCounts(countryname, eventtype = "root", min.date = 20000101)
 6 | }
 7 | \arguments{
 8 |   \item{country.name}{A normal English country name
 9 |   (character) \code{country.name}}
10 | 
11 |   \item{eventtype}{What event code resolution?  Options:
12 |   "code", "base", "root", "quad". \code{eventtype}}
13 | 
14 |   \item{min.date}{Furthest date back you want (numeric).
15 |   \code{min.date}}
16 | }
17 | \value{
18 | df A data frame counts per event per month in the country
19 | }
20 | \description{
21 | By default, this goes back to Jan 1 2000, but you can
22 | change it. Inputs must be characters. Requires the
23 | \code{countrycode} package to translate from country name
24 | to FIPS104. Assumes you have GDELT in a dplyr tble in
25 | tables called "hist.db" and "daily.db".  I have them in a
26 | SQLite database, but dplyr will let you use whatever you
27 | want.  See Hadley Wickham's github page.
28 | }
29 | \details{
30 | Reverted to a prev. version.  No pre-build scaffolding to
31 | get all of the columns.
32 | }
33 | \examples{
34 | mex.protest <- getEventCounts("Mexico", eventtype="root", min.date==20000101)
35 | }
36 | \keyword{GDELT,}
37 | \keyword{gdeltr}
38 | 
39 | 


--------------------------------------------------------------------------------
/man/subsetEventCountry.Rd:
--------------------------------------------------------------------------------
 1 | \name{subsetEventCountry}
 2 | \alias{subsetEventCountry}
 3 | \title{Subset GDELT by a given EventRootCode and country name, returning lat/long for each event.}
 4 | \usage{
 5 | subsetEventCountry(event.root.code, country.name, min.date = 20000101)
 6 | }
 7 | \arguments{
 8 |   \item{event.root.code}{One of the 20 EventRootCodes in
 9 |   CAMEO/GDELT, including leading zero
10 |   \code{event.root.code}}
11 | 
12 |   \item{country.name}{A normal English country name
13 |   \code{country.name}}
14 | 
15 |   \item{min.date}{Furthest date back you want.
16 |   \code{min.date}}
17 | }
18 | \value{
19 | df.out A data frame of the events of interet from the
20 | country, including geographic coordinates.
21 | }
22 | \description{
23 | By default, this goes back to Jan 1 2000, but you can
24 | change it. Inputs must be characters and EventRootCodes
25 | must include leading zeros. Requires the \code{countrycode}
26 | package to translate from country name to FIPS104. Assumes
27 | you have GDELT in a dplyr tble in tables called "hist.db"
28 | and "daily.db".  I have them in a SQLite database, but
29 | dplyr will let you use whatever you want.  See Hadley
30 | Wickham's github page.
31 | }
32 | \examples{
33 | mex.protest <- subsetEventCountry("14", "Mexico", min.date==20000101)
34 | }
35 | \keyword{GDELT,}
36 | \keyword{gdeltr}
37 | \keyword{geographic,}
38 | 
39 | 


--------------------------------------------------------------------------------
/R/GKGextractcameo.R:
--------------------------------------------------------------------------------
 1 | #' Extract CAMEO events from GKG
 2 | #'
 3 | #' From a GKG subset dataframe, return a dataframe of all linked CAMEO event IDs.
 4 | #' If \code{justvector=TRUE}, the fuction will return only a vector of linked CAMEO event IDs rather than the complete dataframe of all matching events. 
 5 | #'
 6 | #' @param df A dataframe of GKG namesets
 7 | #' @param justvector Return vector of ID numbers instead of actual data frame?
 8 | #'
 9 | #' @return gdelt.df A vector of all linked CAMEO event IDs 
10 | #'
11 | #' @keywords GDELT, gdeltr
12 | #'
13 | #' @export
14 | #' 
15 | #' @examples
16 | #' cameos.events <- GKGextractcameo(mexico.cartels)
17 | 
18 | GKGextractcameo <- function(df, justvector=TRUE) {
19 |  if (!"CAMEOEVENTIDS" %in% names(df)) stop("No column named 'CAMEOEVENTIDS'")
20 |  raw <- df$CAMEOEVENTIDS 
21 |  if (length(raw)==0) stop("No rows in input data frame.")
22 |  cameoeventids <- unlist(strsplit(raw, split=","))
23 |  if (justvector==TRUE){
24 |  return(cameoeventids)
25 |  }
26 |  if (justvector==FALSE){
27 |    # need to check if there's a 'hist.db' and 'daily.db'
28 |    # More importantly, need to figure out how to pull records like this.
29 |    stop("This feature isn't complete yet")
30 |    hist <- as.data.frame(filter(hist.db, GLOBALEVENTID==cameoeventids))
31 |    daily <- as.data.frame(filter(daily.db, GLOBALEVENTID==cameoeventids))
32 |    gdelt.df <- rbind(hist, daily)
33 | return(gdelt.df)
34 |  }
35 | }


--------------------------------------------------------------------------------
/man/toneTrend.Rd:
--------------------------------------------------------------------------------
 1 | \name{toneTrend}
 2 | \alias{toneTrend}
 3 | \title{Graph changes in tone over time, given a GKG subset.}
 4 | \usage{
 5 | toneTrend(df, objects, type, location, overlay = TRUE, span = 0.3,
 6 |   returndata = FALSE)
 7 | }
 8 | \arguments{
 9 |   \item{df}{A subset of the Global Knowledge Graph,
10 |   probably a country \code{gkg.df}}
11 | 
12 |   \item{object}{A vector of your entities of interest
13 |   (persons, themes, or organizations.)}
14 | 
15 |   \item{type}{persons, themes, or organizations? Only works
16 |   for "theme" now.}
17 | 
18 |   \item{location}{A location, potentially more specific
19 |   than the data frame subset.}
20 | 
21 |   \item{overlay}{Should the lines be plotted on the same
22 |   graph or separate?}
23 | 
24 |   \item{returndata}{If true, returns the raw data and does
25 |   not plot anything}
26 | 
27 |   \item{span}{How much smoothing on the loess curve?}
28 | }
29 | \value{
30 | theme.counts A data frame containing number of events per
31 | day per theme.
32 | }
33 | \description{
34 | This takes a GKG dataframe and organizations/themes/people
35 | of interest and returns the tone of daily news coverage of
36 | that entity. Right now it only does themes.
37 | }
38 | \examples{
39 | gkg <- read.csv("gkg.csv")
40 | mex <- gkg[grep("Mexico", gkg$LOCATIONS),]
41 | toneTrend(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), type="theme", location="Mexico")
42 | }
43 | \keyword{GDELT,}
44 | \keyword{gdeltr}
45 | 
46 | 


--------------------------------------------------------------------------------
/man/LocationThemes.Rd:
--------------------------------------------------------------------------------
 1 | \name{LocationThemes}
 2 | \alias{LocationThemes}
 3 | \title{In a GKG subset, how many times are given themes mentioned in conjunction with given locations?}
 4 | \usage{
 5 | LocationThemes(df, themes, countries)
 6 | }
 7 | \arguments{
 8 |   \item{df}{A subset of the Global Knowledge Graph
 9 |   including more than one country and one theme \code{df}}
10 | 
11 |   \item{themes}{A vector of themes from GKG. \code{themes}}
12 | 
13 |   \item{countries}{A vector of countries in country name
14 |   form \code{countries}}
15 | }
16 | \value{
17 | theme.counts A data frame containing counts per theme per
18 | country, suitable for faceted barplotting.
19 | }
20 | \description{
21 | This takes a GKG dataframe, a list of themes, and a list of
22 | countries and plots the distribution of mentions per
23 | country.
24 | }
25 | \examples{
26 | latin.protests <- LocationThemes(protests, themes=c("SLUMS", "ECON", "NEW_CONSTRUCTION", "VIOLENT_UNREST", "PUBLIC_TRANSPORT", "EDUCATION"), countries=c("Brazil", "Argentina", "Venezuela", "Colombia", "Uruguay", "Paraguay", "Bolivia", "Ecuador", "Peru", "Chile", "Mexico", "Honduras"))
27 | ggplot(latin.protests, aes(y=Percent, Country, x=Theme, fill=Theme)) + geom_bar(stat="identity") + facet_wrap( ~ Country, nrow=5) + theme_bw() + theme(strip.background = element_rect(fill = 'white'), legend.position="top", axis.ticks = element_blank(), axis.text.x = element_blank()) + labs(x=NULL)
28 | }
29 | \keyword{GDELT,}
30 | \keyword{gdeltr}
31 | 
32 | 


--------------------------------------------------------------------------------
/man/GKGedgelist.Rd:
--------------------------------------------------------------------------------
 1 | \name{GKGedgelist}
 2 | \alias{GKGedgelist}
 3 | \title{Convert a ragged data frame into an edgelist}
 4 | \usage{
 5 | GKGedgelist(df, max.connections = 30)
 6 | }
 7 | \arguments{
 8 |   \item{df}{A subset of the GKG \code{df}}
 9 | 
10 |   \item{max.connections}{How many columns to include? Set a
11 |   number or "all". Default is 30.}
12 | }
13 | \value{
14 | edgelist A data frame with two columns containing the two
15 | nodes defining each edge.
16 | }
17 | \description{
18 | Because GKG's persons, organizations, etc. fields countain
19 | varying numbers of elements, converting them to a data
20 | frame will produce a ragged data frame (rows with different
21 | numbers of columns, albeit padded with NAs. Gephi can
22 | import ragged data frames, thought not if you plan to
23 | include node attributes.  This function will take a ragged
24 | data frame and return a edgelist data frame (2 columns,
25 | lots of rows).
26 | }
27 | \details{
28 | Right now, even with the apply setup (instead of the awful
29 | earlier for-loop) it's still really, really slow.
30 | 
31 | Some namesets contain hundreds of names.  To increase speed
32 | at the loss of some connections, you can limit the number
33 | of columns that are included.  By default, this is set to
34 | 30.
35 | }
36 | \examples{
37 | corruption<- gkg[grep("CORRUPTION", gkg$THEMES),]
38 | corruption <- GKGcomentions(corruption, type="persons")
39 | corruption.edgelist <- GKGedgelist(corruption, max.connections=40)
40 | }
41 | \keyword{GDELT,}
42 | \keyword{gdeltr}
43 | 
44 | 


--------------------------------------------------------------------------------
/R/GKGcounts.R:
--------------------------------------------------------------------------------
 1 | #' Given a dataframe of "Counts" information from the GKG
 2 | #' 
 3 | #' The Global Knowledge Graph contains two elements, the "Counts" file, containing information on the numbers of people killed, affected, etc. every day by location.
 4 | #' The second file, the "Graph file", contains the associated themes, organizations, people, and locations.  Filtering, especially by themes, is very useful, but much of the useable information in the GKG is in the Counts file.  This function will return the Counts file, nicely formatted (it's \code{;} and \code{#} separated, which is a hassle), in no particular order.  It loses the date and theme information, though, which is the next room for improvement.
 5 | #'
 6 | #' @param gkg A subset dataframe of the GKG. \code{gkg}
 7 | #'
 8 | #' @return counts A data frame containing information from the \code{Counts} column. 
 9 | #'
10 | #' @keywords GDELT, gdeltr
11 | #'
12 | #' @export
13 | #' 
14 | #' @examples
15 | #' # Say we were interested in the number of people killed by mines/IEDs.
16 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
17 | #' ieds.counts <- GKGcounts(ieds)
18 | 
19 | GKGcounts <- function(gkg) {
20 |   if (!"COUNTS" %in% names(gkg)) stop("No column named 'COUNTS'")
21 |   counts <- gkg$COUNTS
22 |   if (length(counts)==0) stop("No results--0 rows in input dataframe")
23 |   counts <- strsplit(counts, split=";")
24 |   counts <- unlist(counts)
25 |   counts <- strsplit(counts, split="#")
26 |   nMax <- max(sapply(counts, length))
27 |   counts <- cbind(t(sapply(counts, function(i) i[1:nMax])))
28 |   counts <- as.data.frame(counts)
29 |   return(counts)
30 | }


--------------------------------------------------------------------------------
/man/GKGLatLong.Rd:
--------------------------------------------------------------------------------
 1 | \name{GKGLatLong}
 2 | \alias{GKGLatLong}
 3 | \title{Create a geographic node/edgelist from a GKG dataframe.}
 4 | \usage{
 5 | GKGLatLong(gkg.df, filename)
 6 | }
 7 | \arguments{
 8 |   \item{\code{gkg.df}}{A subset of the Global Knowledge
 9 |   Graph}
10 | 
11 |   \item{\code{filename}}{The filename of the output
12 |   (exclude file endings)}
13 | }
14 | \value{
15 | edgelist.csv A semicolon-separated csv with an edgelist
16 | using the full geographic names.
17 | 
18 | nodelist.csv A semicolon-separated csv including label, id,
19 | latitude, and longitude.
20 | }
21 | \description{
22 | This takes a GKG dataframe and creates node and edgelists
23 | of the co-mentioned geographic locations. These node and
24 | edgelists can be imported into Gephi and viewed with the
25 | "Geo Layout" option. Its saves the edge and node lists in
26 | the working directory under file names that you specify.
27 | }
28 | \details{
29 | The files that the function saves can be imported into
30 | Gephi. Import the nodelist file first, and make sure that
31 | \code{lat} and \code{lng} are set to "Double". Import the
32 | edgelist next. After importing both, clear up any
33 | duplicates by going to Data Labratory > More Actions >
34 | Detect and Merge Duplicates, merging on the Label field.
35 | Make sure you have the Geo Layout plugin installed, as well
36 | as the "Map of Countries" plugin if you want to do the full
37 | visualization inside Gephi.
38 | 
39 | Feature to add: Take the full dataframe, separate by date
40 | to allow dynamic graphs.
41 | }
42 | \examples{
43 | ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
44 | GKGLatLong(ieds, file="ied.network")
45 | }
46 | \keyword{GDELT,}
47 | \keyword{gdeltr}
48 | 
49 | 


--------------------------------------------------------------------------------
/R/toner.R:
--------------------------------------------------------------------------------
 1 | #' Given a GKG subset, return the tones associated with each person/place/organization
 2 | #' 
 3 | #' summarize will return the mean tone for each entity.  This feature isn't done yet.
 4 | #'
 5 | #' @param df A subset of the GKG, probably along one theme\code{df}
 6 | #' @param type Return tones of organization, locations, or persons?
 7 | #' @param summarize Should the mean for each unique entity be returned?  Caution: lots of alt. spellings \code{summarize}
 8 | #'
 9 | #' @return tones A df with names/locations and tones (and counts if summarized).
10 | #'
11 | #' @keywords GDELT, gdeltr
12 | #'
13 | #' @export
14 | #' 
15 | #' @examples
16 | #' > ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
17 | #' > person.tone.ieds <- toner(ieds, type="persons")
18 | #' > dim(person.tone.ieds)
19 | #' [1] 4545    2
20 | 
21 | 
22 | toner <- function(df, type){
23 |   if (!"TONE" %in% names(df)) stop("No column named 'TONE' in input data frame")
24 |   if (nrow(df)==0) stop("Input data frame has 0 rows")
25 |   
26 |   if (type=="Person" | type=="person" | type=="persons" | type=="PERSONS") {
27 |   persontone <- data.frame(stringsAsFactors=FALSE)
28 |   if (!"PERSONS" %in% names(df)) stop("No column named 'PERSONS' in input data frame")
29 |   for (i in 1:nrow(df)) {
30 |     line <- df[i,]
31 |     persons <- as.character(line$PERSONS)
32 |     tone <- as.character(line$TONE)
33 |     tone <- unlist(strsplit(tone, ","))
34 |     tone <- as.numeric(tone[1])
35 |     persons <- as.character(unlist(strsplit(persons, ";")))
36 |     ptone.tmp <- cbind(persons, rep(tone, length(persons)))
37 |     persontone <- rbind(persontone, ptone.tmp)
38 |   }
39 |   persontone$persons <- as.character(persontone$persons)
40 |   persontone$persons <- nameFixer(persontone$persons)
41 |   persontone$V2 <- as.numeric(as.character(persontone$V2))
42 |   persontone <- as.data.frame(summarise(group_by(tbl_df(persontone), persons), count=n(), meantone=mean(V2)))
43 |   return(persontone)
44 |   }
45 |   #  if (type=="locations" | "LOCATIONS") {
46 |   #   if (!"LOCATIONS" %in% names(df)) stop("No column named 'LOCATIONS' in input data frame")
47 |   # }
48 | 
49 |   # if (summarize=TRUE){
50 |   # take the df, group_by column 1, column 2=mean(column2)
51 |   # return
52 |   # }
53 | }
54 | 


--------------------------------------------------------------------------------
/R/subsetEventCountry.R:
--------------------------------------------------------------------------------
 1 | #' Subset GDELT by a given EventRootCode and country name, returning lat/long for each event.
 2 | #' 
 3 | #' By default, this goes back to Jan 1 2000, but you can change it.
 4 | #' Inputs must be characters and EventRootCodes must include leading zeros.
 5 | #' Requires the \code{countrycode} package to translate from country name to FIPS104.
 6 | #' Assumes you have GDELT in a dplyr tble in tables called "hist.db" and "daily.db".  I have them in a SQLite database, but dplyr will let you use whatever you want.  See Hadley Wickham's github page.
 7 | #'
 8 | #' @param event.root.code One of the 20 EventRootCodes in CAMEO/GDELT, including leading zero \code{event.root.code}
 9 | #' @param country.name A normal English country name \code{country.name}
10 | #' @param min.date Furthest date back you want.  \code{min.date}
11 | #'
12 | #' @return df.out A data frame of the events of interet from the country, including geographic coordinates. 
13 | #'
14 | #' @keywords GDELT, geographic, gdeltr
15 | #'
16 | #' @export
17 | #' 
18 | #' @examples
19 | #' mex.protest <- subsetEventCountry("14", "Mexico", min.date==20000101)
20 | 
21 | 
22 | subsetEventCountry <- function(event.root.code, country.name, min.date=20000101){
23 |   require(countrycode)
24 |   require(reshape2)
25 |   require(dplyr)
26 |   require(RSQLite)
27 |   require(RSQLite.extfuns)
28 |   
29 |   country.code <- countrycode(country.name, "country.name", "fips104")
30 |   df <- select(hist.db, SQLDATE, EventRootCode, ActionGeo_CountryCode, ActionGeo_Lat, ActionGeo_Long)
31 |   df <- filter(df, SQLDATE >= min.date, EventRootCode==event.root.code, ActionGeo_CountryCode==country.code)
32 |   df <- group_by(df, SQLDATE, ActionGeo_Lat, ActionGeo_Long, EventRootCode)
33 |   df <- summarise(df, count=n())
34 |   df <- as.data.frame(df)
35 |   
36 |   df.daily <- select(daily.db, SQLDATE, EventRootCode, ActionGeo_CountryCode, ActionGeo_Lat, ActionGeo_Long)
37 |   df.daily <- filter(df.daily, EventRootCode==event.root.code, ActionGeo_CountryCode==country.code)
38 |   df.daily <- group_by(df.daily, SQLDATE, EventRootCode, ActionGeo_Lat, ActionGeo_Long)
39 |   df.daily <- summarise(df.daily, count=n())
40 |   df.daily <- as.data.frame(df.daily)
41 |   
42 |   df.out <- rbind(df, df.daily)
43 |   
44 |   df.out$SQLDATE <- gDate(df.out$SQLDATE)
45 |   
46 |   return(df.out)
47 | }


--------------------------------------------------------------------------------
/R/GKGedgelist.R:
--------------------------------------------------------------------------------
 1 | #' Convert a ragged data frame into an edgelist
 2 | #' 
 3 | #' Because GKG's persons, organizations, etc. fields countain varying numbers of elements, converting them to a data frame will produce a ragged data frame (rows with different numbers of columns, albeit padded with NAs.
 4 | #' Gephi can import ragged data frames, thought not if you plan to include node attributes.  This function will take a ragged data frame and return a edgelist data frame (2 columns, lots of rows).
 5 | #' 
 6 | #' Right now, even with the apply setup (instead of the awful earlier for-loop) it's still really, really slow.  
 7 | #'
 8 | #'@details Some namesets contain hundreds of names.  To increase speed at the loss of some connections, you can limit the number of columns that are included.  By default, this is set to 30.
 9 | #'
10 | #' @param df A subset of the GKG \code{df}
11 | #' @param max.connections How many columns to include? Set a number or "all". Default is 30.
12 | #'
13 | #' @return edgelist A data frame with two columns containing the two nodes defining  each edge.
14 | #'
15 | #' @keywords GDELT, gdeltr
16 | #'
17 | #'@details 
18 | #' Speet test with 1,000 x 30: \n
19 | #' No compilation, omit NAs all at once at the end: 83.417 \n
20 | #' With compilation, omit NAs all all at once at the end: 77.874 \n
21 | #' No compilation, omit NAs every row: 69.892 \n
22 | #' With compilation, omit NAs every row: 70.164 \n
23 | #' [these weren't very scientific since I ran them once each and did other stuff in the background]
24 | #'
25 | #' @export
26 | #' 
27 | #' @examples
28 | #' corruption<- gkg[grep("CORRUPTION", gkg$THEMES),]
29 | #' corruption <- GKGcomentions(corruption, type="persons")
30 | #' corruption.edgelist <- GKGedgelist(corruption, max.connections=40)
31 | #' 
32 | 
33 | # is it faster to omit nas while it's running or after?
34 | 
35 | GKGedgelist <- function(df, max.connections=30) {
36 |   # trim if needed
37 |   if (max.connections!="all"){
38 |     df <- df[,1:max.connections]
39 |   }
40 |   # the apply part of the function.  "combn" provides all the unique combos of x in length n=2. t() transposes.
41 |   split.fun <- function(x){
42 |     #  x <- x[!is.na(x)]
43 |     x <- t(combn(x, 2))
44 |     x <- x[!is.na(x[,1]),]
45 |     x <- x[!is.na(x[,2]),]
46 |     return(x)
47 |   }
48 |   # do the call over the length of the df, bind each result.  Remove NA's.  "complete.cases" wasn't working for me.
49 |   edgelist <- do.call("rbind", lapply(df, split.fun))
50 | }
51 | 


--------------------------------------------------------------------------------
/R/toneTrend.R:
--------------------------------------------------------------------------------
 1 | #' Graph changes in tone over time, given a GKG subset.
 2 | #' 
 3 | #' This takes a GKG dataframe and organizations/themes/people of interest and returns the tone of daily news coverage of that entity. Right now it only does themes.
 4 | #' 
 5 | #'
 6 | #' @param df A subset of the Global Knowledge Graph, probably a country \code{gkg.df}
 7 | #' @param object A vector of your entities of interest (persons, themes, or organizations.)
 8 | #' @param type persons, themes, or organizations? Only works for "theme" now.
 9 | #' @param location A location, potentially more specific than the data frame subset.
10 | #' @param overlay Should the lines be plotted on the same graph or separate?
11 | #' @param returndata If true, returns the raw data and does not plot anything
12 | #' @param span How much smoothing on the loess curve?
13 | #'
14 | #' @return theme.counts A data frame containing number of events per day per theme.
15 | #'
16 | #' @keywords GDELT, gdeltr
17 | #' 
18 | #'
19 | #' @export
20 | #' 
21 | #' @examples
22 | #' gkg <- read.csv("gkg.csv")
23 | #' mex <- gkg[grep("Mexico", gkg$LOCATIONS),]
24 | #' toneTrend(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), type="theme", location="Mexico")
25 | 
26 | 
27 | toneTrend <- function(df, objects, type, location, overlay=TRUE, span=0.3, returndata=FALSE){
28 |   require(ggplot2)
29 |   theme.counts <- data.frame()
30 |   for(i in 1:length(objects)){
31 |     # loop through the themes vector, return # per day of each.
32 |     object.i <- objects[i]
33 |     if(type=="theme" | type=="THEMES"){
34 |       tmp <- df[grep(object.i, df$THEMES),]
35 |       tones <- strsplit(as.character(tmp$TONE), ",")
36 |       tmp$tone <- as.numeric(sapply(tones, "[", 1))
37 |       tmp$type <- tolower(gsub("_", " ", object.i))
38 |       theme.counts <- rbind(theme.counts, tmp)
39 |     }
40 |   }
41 |   theme.counts <- theme.counts[,c("DATE", "tone", "type")]
42 |   # just the cols we need, condense by day
43 |   theme.counts$DATE <- gDate(theme.counts$DATE)
44 |   theme.counts <- as.data.frame(summarise(group_by(tbl_df(theme.counts), DATE, type), Number=n(), Tone=mean(tone)))
45 |   #maxheight <- max(theme.counts$Number) * 1.05
46 |   if(returndata==TRUE){
47 |     return(theme.counts)
48 |     stop()
49 |   }
50 |   if(overlay==TRUE){
51 |     # all on the same graph
52 |     return(ggplot(data=theme.counts, aes(x=DATE, y=Tone, color=type)) + geom_point(size=2, alpha=0.7) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + ylab("Tone")  + theme_bw())
53 |   }
54 |   if(overlay==FALSE){
55 |     # on different graphs
56 |     return(ggplot(data=theme.counts, aes(x=DATE, y=Tone, type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + facet_wrap(~ type, ncol=1) + theme_bw())
57 |   }
58 | }


--------------------------------------------------------------------------------
/R/themeTrend.R:
--------------------------------------------------------------------------------
 1 | #' Graph changes in themes over time, given a GKG subset.
 2 | #' 
 3 | #' This takes a GKG dataframe and a list of themes and plots the mentions of the themes over time.
 4 | #' 
 5 | #'
 6 | #' @param df A subset of the Global Knowledge Graph, probably a country \code{gkg.df}
 7 | #' @param themes A vector of themes from GKG.
 8 | #' @param span The smoothing factor for the loess curve. Default is 0.3
 9 | #' @param location A location, potentially more specific than the data frame subset.
10 | #' @param overlay Should the lines be plotted on the same graph or separate?
11 | #' @param returndata If true, returns the raw data and does not plot anything
12 | #'
13 | #' @return theme.counts A data frame containing number of events per day per theme.
14 | #'
15 | #' @keywords GDELT, gdeltr
16 | #' 
17 | #'
18 | #' @export
19 | #' 
20 | #' @examples
21 | #' gkg <- read.csv("gkg.csv")
22 | #' mex <- gkg[grep("Mexico", gkg$LOCATIONS),]
23 | #' themeTrends(mex, c("CRIME_CARTELS", "SECURITY_SERVICES", "KILL"), location="Mexico")
24 | 
25 | 
26 | themeTrend <- function(df, themes, location, overlay=TRUE, returndata=FALSE, span=0.3){
27 |   require(gdeltr)
28 |   require(ggplot2)
29 |   # df should preferably just be COUNTS, THEMES, DATE for the region (or theme?) you're interested in.
30 |   # location must be a country code right now. In the future it should be a city or anything else grepable.
31 |   theme.counts <- data.frame()
32 |   location <- paste0("#", location, "#")
33 |   for(i in 1:length(themes)){
34 |     # loop through the themes vector, return # per day of each.
35 |     type.i <- themes[i]
36 |     tmp <- df[grep(type.i, df$THEMES),]
37 |     tmp$Number <- sapply(tmp$LOCATIONS, function(x) length(grep(location, unlist(strsplit(x, ";")))))
38 |     tmp$type <- type.i
39 |     theme.counts <- rbind(theme.counts, tmp)
40 |   }
41 |   theme.counts <- theme.counts[,c("DATE", "Number", "type")]
42 |   # just the cols we need, condense by day
43 |   theme.counts <- as.data.frame(summarise(group_by(tbl_df(theme.counts), DATE, type), Number=sum(Number)))
44 |   theme.counts$DATE <- gDate(theme.counts$DATE)
45 |   maxheight <- max(theme.counts$Number) + 10
46 |  
47 |   if(returndata==TRUE){
48 |     return(theme.counts)
49 |     stop()
50 |   }
51 |   
52 |   if(overlay==TRUE){
53 |     # all on the same graph
54 |     return(ggplot(data=theme.counts, aes(x=DATE, y=Number, color=type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=span, se=FALSE, size=1) + ylim(0, maxheight) + ylab("Count")  + theme_bw())
55 |   }
56 |   
57 |   if(overlay==FALSE){
58 |     # on different graphs
59 |     return(ggplot(data=theme.counts, aes(x=DATE, y=Number, type)) + geom_line(size=1, alpha=.3) + geom_smooth(method="loess", span=0.3, se=FALSE, size=1) + facet_wrap(~ type, ncol=1) + theme_bw())
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/R/LocationThemes.R:
--------------------------------------------------------------------------------
 1 | #' In a GKG subset, how many times are given themes mentioned in conjunction with given locations?
 2 | #' 
 3 | #' This takes a GKG dataframe, a list of themes, and a list of countries and plots the distribution of mentions per country.
 4 | #' 
 5 | #'
 6 | #' @param df A subset of the Global Knowledge Graph including more than one country and one theme \code{df}
 7 | #' @param themes A vector of themes from GKG. \code{themes}
 8 | #' @param countries A vector of countries in country name form \code{countries}
 9 | #'
10 | #' @return theme.counts A data frame containing counts per theme per country, suitable for faceted barplotting.
11 | #'
12 | #' @keywords GDELT, gdeltr
13 | #' 
14 | #'
15 | #' @export
16 | #' 
17 | #' @examples
18 | #' latin.protests <- LocationThemes(protests, themes=c("SLUMS", "ECON", "NEW_CONSTRUCTION", "VIOLENT_UNREST", "PUBLIC_TRANSPORT", "EDUCATION"), countries=c("Brazil", "Argentina", "Venezuela", "Colombia", "Uruguay", "Paraguay", "Bolivia", "Ecuador", "Peru", "Chile", "Mexico", "Honduras"))
19 | #' ggplot(latin.protests, aes(y=Percent, Country, x=Theme, fill=Theme)) + geom_bar(stat="identity") + facet_wrap( ~ Country, nrow=5) + theme_bw() + theme(strip.background = element_rect(fill = 'white'), legend.position="top", axis.ticks = element_blank(), axis.text.x = element_blank()) + labs(x=NULL)
20 | 
21 | 
22 | 
23 | LocationThemes <- function(df, themes, countries){
24 |   # "df" will be the overarching big theme, like protests, for the whole world
25 |   theme.counts <- data.frame()
26 |   for(i in 1:length(themes)){
27 |     type.i <- themes[i]
28 |     locations <- df[grep(type.i, df$THEMES), "LOCATIONS"]
29 |     locations <- as.character(unlist(strsplit(locations, ";")))
30 |     locations <- strsplit(as.character(locations), "#")
31 |     locations <- sapply(locations, "[", 3)
32 |     locations <- as.data.frame(table(locations))
33 |     locations$type <- tolower(gsub("_", " ", type.i))
34 |     theme.counts <- rbind(theme.counts, locations)
35 |   }
36 |   names(theme.counts) <- c("Country", "Count", "Theme") 
37 |   countrylist <- countrycode(countries, "country.name", "fips104")
38 |   theme.counts <- theme.counts[theme.counts$Country %in% countrylist,]
39 |   #print("Second loop done")
40 |   #print(head(theme.counts))
41 |   theme.counts$Country <- countrycode(theme.counts$Country, "fips104", "country.name")
42 |   countrytotals <- as.character(unlist(strsplit(df$LOCATIONS, ";")))
43 |   countrytotals <- strsplit(as.character(countrytotals), "#")
44 |   countrytotals <- sapply(countrytotals, "[", 3)
45 |   countrytotals <- as.data.frame(table(countrytotals))
46 |   names(countrytotals) <- c("Country", "Total")
47 |   countrytotals$Country <- countrycode(countrytotals$Country, "fips104", "country.name")
48 |   theme.counts <- merge(theme.counts, countrytotals, by="Country")
49 |   theme.counts$Percent <- theme.counts$Count / theme.counts$Total
50 |   return(theme.counts)
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | gdeltr
 2 | ======
 3 | 
 4 | `gdeltr` is my ad-hoc collection of functions for working with [GDELT](http://gdelt.utdallas.edu/).  It is completely untested on any other machine, has no error catching functions, and has completely excessive dependencies on other packages.  I recommend copying and pasting any code you find useful rather than installing the whole package.
 5 | 
 6 | Two basic utilities for working with the traditional event stream:
 7 | 
 8 | * `gDate` for converting dates from a "yyymmdd" string to a "yyyy-mm-dd" Date class.
 9 | * `fillSeries` to add missing days to a GDELT data frame for plotting or time series analysis
10 | 
11 | Two more advanced functions for pulling events from a `dplyr`/SQLite setup, as described [here](http://andrewhalterman.com/2013/08/28/gdelt_dplyr_sqlite/):
12 | * `subsetEventCountry` for returning lat/long fields given a country name and EventRootCode.
13 | * `getEventCounts` for returning base, root, or regular event codes per month for a given country.
14 | 
15 | 
16 | ### Global Knowledge Graph
17 | The Global Knowledge Graph is the newest component of GDELT.  In his [announcement](http://gdeltblog.wordpress.com/2013/10/27/announcing-the-debut-of-the-gdelt-global-knowledge-graph/), Kalev describes it as an attempt "to connect every person, organization, location, count, theme, news source, and event across the planet into a single massive network that captures what's happening around the world, what its context is and who's involved, and how the world is feeling about it, every single day."
18 | He also points out that it's much more difficult to work with than the original event stream and recommends using Perl or Python for working with it.  (For one example, it's a nested structure using a combination of tabs, hashtags, and semicolons as separators.)  While Perl/Python may be better, there are lots of people (including me) who are much more comfortable working in R, even if it's inferior.   Here are my quick hacks for working with the alpha experimental release of GDELT's Global Knowledge Graph:
19 | * `GKGcomentions` for pulling co-mentioned organizations, people, or countries from a subsetted GKG file.
20 | * `GKGextractcameo` will return the events from the traditional stream associated with a subset of the GKG namespaces.  It can return either the vector of GLOBALEVENTIDs, or, if you have a dplyr/sqlite setup, the full data frame of events.
21 | * `toner` will, for a given GKG subset, return the tones associated with each person/place/organization associated with it.
22 | * `GKGcounts` will take a subset of the GKG and return just the info in the `COUNTS` column, nicely formatted.  This refers to info in the "Counts" column, not sums of number of events as above.
23 | * `GKGedgelist` will take a ragged data frame with co-mentions and format it into a two-column edge list for export to Gephi or other network analysis tool.
24 | * `write.gephi`: a wrapper for `write.table` that puts quotes around all elements in the df and writes with semicolon separators and without row/column names.
25 | * `nameFixer` will standardize names from the GKG.  Only has about 30 (mostly Syria-related) names right now.  This isn't really worth using yet and I'm sure there's a better approach than this.
26 | * `themeTrend`. This will plot the number of mentions of arbitrary themes per day.
27 | * `toneTrend`. This will plot the average tone of coverage per day of persons, organizations, or themes.
28 | 
29 | 


--------------------------------------------------------------------------------
/R/GKGLatLong.R:
--------------------------------------------------------------------------------
 1 | #' Create a geographic node/edgelist from a GKG dataframe.
 2 | #' 
 3 | #' This takes a GKG dataframe and creates node and edgelists of the co-mentioned geographic locations. These node and edgelists can be imported into Gephi and viewed with the "Geo Layout" option.
 4 | #' Its saves the edge and node lists in the working directory under file names that you specify.
 5 | #'
 6 | #' @param \code{gkg.df} A subset of the Global Knowledge Graph 
 7 | #' @param \code{filename} The filename of the output (exclude file endings)
 8 | #'
 9 | #' @return edgelist.csv A semicolon-separated csv with an edgelist using the full geographic names.
10 | #' @return nodelist.csv A semicolon-separated csv including label, id, latitude, and longitude.
11 | #'
12 | #' @keywords GDELT, gdeltr
13 | #'
14 | #' @details The files that the function saves can be imported into Gephi. Import the nodelist file first, and make sure that \code{lat} and \code{lng} are set to "Double". Import the edgelist next. After importing both, clear up any duplicates by going to Data Labratory > More Actions > Detect and Merge Duplicates, merging on the Label field.
15 | #' Make sure you have the Geo Layout plugin installed, as well as the "Map of Countries" plugin if you want to do the full visualization inside Gephi.
16 | #'
17 | #'Feature to add: Take the full dataframe, separate by date to allow dynamic graphs.
18 | #'
19 | #' @export
20 | #' 
21 | #' @examples
22 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
23 | #' GKGLatLong(ieds, file="ied.network")
24 | 
25 | GKGLatLong <- function(gkg.df, filename){
26 |   if (!"ORGANIZATIONS" %in% names(gkg.df)) stop("No column named 'ORGANIZATIONS'")
27 |   locations.original <- as.character(gkg.df$LOCATIONS)
28 |   locations <- strsplit(locations.original, split=";")
29 |   nMax <- max(sapply(locations, length))
30 |   locations <- cbind(t(sapply(locations, function(i) i[1:nMax])))
31 |   locations <- as.data.frame(locations, stringsAsFactors=FALSE)
32 |   ## Remove all the single-node (non-edge) rows
33 |   locations <- subset(locations, locations[,2] != "NA")
34 |   # Now we need to change each cell into just a FullName
35 |   locations.df <- as.data.frame(locations[1,])
36 |   
37 |   for (col.tmp in 1:ncol(locations)){
38 |     locations.one <- t(locations[col.tmp,])
39 |     locations.one <- strsplit(locations.one, "#")
40 |     nMax <- max(sapply(locations.one, length))
41 |     locations.one <- cbind(t(sapply(locations.one, function(i) i[1:nMax])))
42 |     locations.one <- as.data.frame(locations.one, stringsAsFactors=FALSE)
43 |     locations.one <- as.data.frame(t(locations.one[,2]))
44 |     locations.df <- rbind(locations.df, locations.one)
45 |   }
46 |   locations.df <- subset(locations.df, locations.df[,2] != "NA")
47 |   locations.df <- locations.df[2:49,]
48 |   locations.edgelist <- GKGedgelist(locations.df, max.connections=24)
49 |   locations.edgelist <- as.data.frame(locations.edgelist)
50 |   locations.edgelist$type <- "Undirected"
51 |   names(locations.edgelist) <- c("Source", "Target", "Type")
52 |   
53 |   node.latlong <- strsplit(locations.original, split=";")
54 |   node.latlong <- unlist(node.latlong)
55 |   node.latlong <- strsplit(node.latlong, split="#")
56 |   nMax <- max(sapply(node.latlong, length))
57 |   node.latlong <- cbind(t(sapply(node.latlong, function(i) i[1:nMax])))
58 |   node.latlong <- as.data.frame(node.latlong, stringsAsFactors=FALSE)
59 |   node.latlong <- node.latlong[,c(2,5,6)]
60 |   names(node.latlong) <- c("id", "lat", "lng")
61 |   node.latlong$label <- node.latlong$id
62 |   
63 |   node.filename <- paste0(filename, ".nodes.csv")
64 |   edge.filename <- paste0(filename, ".edges.csv")
65 |   write.table(locations.edgelist, file=edge.filename, sep=";", row.names=FALSE)
66 |   write.table(node.latlong, file=node.filename, sep=";", row.names=FALSE)
67 | }


--------------------------------------------------------------------------------
/R/GKGcomentions.R:
--------------------------------------------------------------------------------
 1 | #' Given a subsetted dataframe from the Global Knowledge Graph, return a df with co-mentions.
 2 | #' 
 3 | #' This takes a GKG dataframe (or subset thereof)  returns a dataframe with all co-mentioned entities of the desired type listed on the same row.
 4 | #' This is designed for export to social network analysis software.  Run the output through \code{write.gephi} if needed.
 5 | #' New feature: uses \code{nameFixer} to standardize people names.
 6 | #'
 7 | #' @param gkg.df A subset of the Global Knowledge Graph \code{gkg.df}
 8 | #' @param type Data types to subset: "themes", "persons", "organizations", "countries", or "latlong". \code{gkg.df}
 9 | #'
10 | #' @return countries.df A data frame containing count information.
11 | #'
12 | #' @keywords GDELT, gdeltr
13 | #'
14 | #' @export
15 | #' 
16 | #' @examples
17 | #' ieds <- gkg[grep("LANDMINE", gkg$THEMES),]
18 | #' ieds.orgs <- GKGcomentions(ieds, type="organizations")
19 | 
20 | 
21 | GKGcomentions <- function(gkg.df, type) {
22 |   if (type=="organizations" | type=="orgs"){
23 |     if (!"ORGANIZATIONS" %in% names(gkg.df)) stop("No column named 'ORGANIZATIONS'")
24 |     orgs <- gkg.df$ORGANIZATIONS
25 |     if (is.factor(orgs)==TRUE){orgs <- as.character(orgs)}
26 |     orgs <- strsplit(orgs, split=";")
27 |     nMax <- max(sapply(orgs, length))
28 |     orgs <- cbind(t(sapply(orgs, function(i) i[1:nMax])))
29 |     orgs <- as.data.frame(orgs)
30 |     return(orgs)
31 |   }
32 |   if (type=="themes"){
33 |     if (!"THEMES" %in% names(gkg.df)) stop("No column named 'THEMES'")
34 |     themes <- gkg.df$THEMES
35 |     if (is.factor(themes)==TRUE){themes <- as.character(themes)}
36 |     themes <- strsplit(themes, split=";")
37 |     nMax <- max(sapply(themes, length))
38 |     themes <- cbind(t(sapply(themes, function(i) i[1:nMax])))
39 |     themes <- as.data.frame(themes)
40 |     return(themes)
41 |   }
42 |     
43 |   if (type=="persons"){
44 |     if (!"PERSONS" %in% names(gkg.df)) stop("No column named 'PERSONS'")
45 |     persons <- gkg.df$PERSONS
46 |     if (is.factor(persons)==TRUE){persons <- as.character(persons)}
47 |     persons <- strsplit(persons, split=";")
48 |     nMax <- max(sapply(persons, length))
49 |     persons <- cbind(t(sapply(persons, function(i) i[1:nMax])))
50 |     persons <- as.data.frame(persons)
51 |     for (i in 1:ncol(persons)){
52 |      persons[,i] <- nameFixer(persons[,i])
53 |     }
54 |     return(persons)
55 |   }
56 |   if (type=="countries"){
57 |     if (!"LOCATIONS" %in% names(gkg.df)) stop("No column named 'LOCATIONS'")
58 |     countries <- gkg.df$LOCATIONS
59 |     if (is.factor(countries)==TRUE){countries <- as.character(countries)}
60 |     countries <- strsplit(countries, split=";")
61 |     nMax <- max(sapply(countries, length))
62 |     countries <- cbind(t(sapply(countries, function(i) i[1:nMax])))
63 |     countries <- countries[,3]
64 |     countries.df <- data.frame(row.names=1:nrow(countries))
65 |     for (i in 1:ncol(countries)) {
66 |       tmp <- countries[,i]
67 |       tmp1 <- strsplit(tmp, split="#")
68 |       tmp2 <- sapply(tmp1, "[", 3)
69 |       countries.df <- cbind(countries.df, tmp2)
70 |     }
71 |     cc <- function(x) {countrycode(x, "fips104", "country.name")}
72 |     countries.df <- as.data.frame(lapply(countries.df[,1:ncol(countries.df)],FUN = function(x) {sapply(x,FUN=cc)}))
73 |     return(countries.df)
74 | }
75 |   
76 |     if (type=="latlong"){
77 |       if (!"LOCATIONS" %in% names(gkg.df)) stop("No column named 'LOCATIONS'")
78 |       latlong <- gkg.df$LOCATIONS
79 |       if (is.factor(latlong)==TRUE){latlong <- as.character(latlong)}
80 |       latlong <- strsplit(latlong, split=";")
81 |       nMax <- max(sapply(latlong, length))
82 |       latlong <- cbind(t(sapply(latlong, function(i) i[1:nMax])))
83 |       latlong <- latlong[,3]
84 |       latlong.df <- data.frame(row.names=1:nrow(latlong))
85 |       for (i in 1:ncol(latlong)) {
86 |         tmp <- latlong[,i]
87 |         tmp1 <- strsplit(tmp, split="#")
88 |         tmp2 <- sapply(tmp1, "[", 3)
89 |         latlong.df <- cbind(latlong.df, tmp2)
90 |       }
91 |     }
92 | 
93 |   
94 | }
95 | 


--------------------------------------------------------------------------------
/R/getEventCounts.R:
--------------------------------------------------------------------------------
  1 | #' Get event counts per country-month from GDELT
  2 | #' 
  3 | #' By default, this goes back to Jan 1 2000, but you can change it.
  4 | #' Inputs must be characters.
  5 | #' Requires the \code{countrycode} package to translate from country name to FIPS104.
  6 | #' Assumes you have GDELT in a dplyr tble in tables called "hist.db" and "daily.db".  I have them in a SQLite database, but dplyr will let you use whatever you want.  See Hadley Wickham's github page.
  7 | #'
  8 | #' Reverted to a prev. version.  No pre-build scaffolding to get all of the columns.
  9 | #'
 10 | #'
 11 | #' @param country.name A normal English country name (character) \code{country.name}
 12 | #' @param eventtype What event code resolution?  Options: "code", "base", "root", "quad". \code{eventtype}
 13 | #' @param min.date Furthest date back you want (numeric).  \code{min.date}
 14 | #'
 15 | #' @return df A data frame counts per event per month in the country 
 16 | #'
 17 | #' @keywords GDELT, gdeltr
 18 | #'
 19 | #' @export
 20 | #' 
 21 | #' @examples
 22 | #' mex.protest <- getEventCounts("Mexico", eventtype="root", min.date==20000101)
 23 | 
 24 | 
 25 | getEventCounts <- function(countryname, eventtype="root", min.date=20000101){
 26 |   require(countrycode)
 27 |   require(reshape2)
 28 |   require(dplyr)
 29 |   require(RSQLite)
 30 |   require(RSQLite.extfuns)
 31 |   
 32 |   fips.code <- countrycode(countryname, "country.name", "fips104")
 33 |   iso.code <- countrycode(countryname, "country.name", "iso3c")
 34 |   
 35 |   if (eventtype=="root") {
 36 |     df <- select(hist.db, SQLDATE, MonthYear, EventRootCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 37 |     df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 38 |     df <- as.data.frame(df)
 39 |     df <- tbl_df(df)
 40 |     df <- group_by(df, EventRootCode, MonthYear)
 41 |     df <- summarise(df, count=n())
 42 |     df <- as.data.frame(df)
 43 |     df <- dcast(df, MonthYear ~ EventRootCode)
 44 | 
 45 |     df2 <- select(daily.db, SQLDATE, MonthYear, EventRootCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 46 |     df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 47 |     df2 <- as.data.frame(df2)
 48 |     df2 <- tbl_df(df2)
 49 |     df2 <- group_by(df2, EventRootCode, MonthYear)
 50 |     df2 <- summarise(df2, count=n())
 51 |     df2 <- as.data.frame(df2)
 52 |     df2 <- dcast(df2, MonthYear ~ EventRootCode)
 53 |     df <- merge(df, df2, all=TRUE)
 54 |     df[is.na(df)] <- 0
 55 |     return(df)
 56 |   }
 57 |   if (eventtype=="base") {
 58 |     df <- select(hist.db, SQLDATE, MonthYear, EventBaseCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 59 |     df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 60 |     df <- as.data.frame(df)
 61 |     df <- tbl_df(df)
 62 |     df <- group_by(df, EventBaseCode, MonthYear)
 63 |     df <- summarise(df, count=n())
 64 |     df <- as.data.frame(df)
 65 |     df <- dcast(df, MonthYear ~ EventBaseCode)
 66 | 
 67 |     df2 <- select(daily.db, SQLDATE, MonthYear, EventBaseCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 68 |     df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 69 |     df2 <- as.data.frame(df2)
 70 |     df2 <- tbl_df(df2)
 71 |     df2 <- group_by(df2, EventBaseCode, MonthYear)
 72 |     df2 <- summarise(df2, count=n())
 73 |     df2 <- as.data.frame(df2)
 74 |     df2 <- dcast(df2, MonthYear ~ EventBaseCode)
 75 |     df <- merge(df, df2)
 76 |     df[is.na(df)] <- 0
 77 |     return(df)
 78 |   }  
 79 |   if (eventtype=="code") {
 80 |     df <- select(hist.db, SQLDATE, MonthYear, EventCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 81 |     df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 82 |     df <- as.data.frame(df)
 83 |     df <- tbl_df(df)
 84 |     df <- group_by(df, EventCode, MonthYear)
 85 |     df <- summarise(df, count=n())
 86 |     df <- as.data.frame(df)
 87 |     df <- dcast(df, MonthYear ~ EventCode)
 88 |   
 89 |     df2 <- select(daily.db, SQLDATE, MonthYear, EventCode, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
 90 |     df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
 91 |     df2 <- as.data.frame(df2)
 92 |     df2 <- tbl_df(df2)
 93 |     df2 <- group_by(df2, EventCode, MonthYear)
 94 |     df2 <- summarise(df2, count=n())
 95 |     df2 <- as.data.frame(df2)
 96 |     df2 <- dcast(df2, MonthYear ~ EventCode)
 97 |     df <- merge(df, df2, all=TRUE)
 98 |     df[is.na(df)] <- 0
 99 |     return(df)
100 |   }  
101 |   if (eventtype=="quad") {
102 |   df <- select(hist.db, SQLDATE, MonthYear, QuadClass, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
103 |   df <- filter(df, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
104 |   df <- as.data.frame(df)
105 |   df <- tbl_df(df)
106 |   df <- group_by(df, QuadClass, MonthYear)
107 |   df <- summarise(df, count=n())
108 |   df <- as.data.frame(df)
109 |   df <- dcast(df, MonthYear ~ QuadClass)
110 |   
111 |   df2 <- select(daily.db, SQLDATE, MonthYear, QuadClass, ActionGeo_CountryCode, Actor1CountryCode, Actor2CountryCode)
112 |   df2 <- filter(df2, SQLDATE >= min.date, ActionGeo_CountryCode==fips.code, Actor1CountryCode==iso.code, Actor2CountryCode==iso.code)
113 |   df2 <- as.data.frame(df2)
114 |   df2 <- tbl_df(df2)
115 |   df2 <- group_by(df2, QuadClass, MonthYear)
116 |   df2 <- summarise(df2, count=n())
117 |   df2 <- as.data.frame(df2)
118 |   df2 <- dcast(df2, MonthYear ~ QuadClass)
119 |   df <- merge(df, df2, all=TRUE)
120 |   df[is.na(df)] <- 0
121 |   return(df)
122 | }  
123 | }


--------------------------------------------------------------------------------