├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── aaa.r
    ├── gd-top-trending.R
    ├── list-chyrons.r
    ├── networks.r
    ├── newsflash-package.R
    ├── newsflash.r
    ├── third-eye.r
    ├── top-tending-range.r
    ├── word-cloud.R
    └── zzz.R
├── README.Rmd
├── README.md
├── README_cache
    └── gfm
    │   ├── __packages
    │   ├── unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.RData
    │   ├── unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdb
    │   ├── unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdx
    │   ├── unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.RData
    │   ├── unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdb
    │   ├── unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdx
    │   ├── unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.RData
    │   ├── unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdb
    │   ├── unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdx
    │   ├── unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.RData
    │   ├── unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdb
    │   ├── unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdx
    │   ├── unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.RData
    │   ├── unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdb
    │   ├── unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdx
    │   ├── unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.RData
    │   ├── unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdb
    │   └── unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdx
├── README_files
    ├── figure-gfm
    │   ├── unnamed-chunk-10-1.png
    │   ├── unnamed-chunk-4-1.png
    │   ├── unnamed-chunk-6-1.png
    │   └── unnamed-chunk-7-1.png
    ├── figure-markdown_github-ascii_identifiers
    │   ├── unnamed-chunk-10-1.png
    │   ├── unnamed-chunk-11-1.png
    │   ├── unnamed-chunk-4-1.png
    │   └── unnamed-chunk-9-1.png
    └── figure-markdown_github
    │   ├── unnamed-chunk-10-1.png
    │   ├── unnamed-chunk-5-1.png
    │   ├── unnamed-chunk-6-1.png
    │   ├── unnamed-chunk-7-1.png
    │   ├── unnamed-chunk-8-1.png
    │   └── unnamed-chunk-9-1.png
├── man
    ├── gd_top_trending.Rd
    ├── iatv_top_trending.Rd
    ├── list_chyrons.Rd
    ├── list_networks.Rd
    ├── newsflash.Rd
    ├── query_tv.Rd
    ├── read_chyrons.Rd
    └── word_cloud.Rd
├── newsflash.Rproj
└── tests
    ├── test-all.R
    └── testthat
        └── test-newsflash.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | ^README\.*Rmd$
5 | ^README\.*html$
6 | ^NOTES\.*Rmd$
7 | ^NOTES\.*html$
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Rproj
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | warnings_are_errors: true
 3 | sudo: required
 4 | 
 5 | cache: packages
 6 | 
 7 | r:
 8 |  - oldrel
 9 |  - release
10 |  - devel
11 | 
12 | apt_packages:
13 |   - libv8-dev
14 |   - xclip
15 | 
16 | env:
17 |  global:
18 |    - CRAN: http://cran.rstudio.com
19 | 
20 | notifications:
21 |   email:
22 |     - bob@rud.is
23 |   irc:
24 |     channels:
25 |       - "104.236.112.222#builds"
26 |     nick: travisci
27 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: newsflash
 2 | Type: Package
 3 | Title: Tools to Work with the Internet Archive and GDELT Television Explorer
 4 | Version: 0.6.0
 5 | Date: 2017-10-01
 6 | Author: Bob Rudis (bob@rud.is)
 7 | Authors@R: c(
 8 |       person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 
 9 |            comment = c(ORCID = "0000-0001-5670-2640")),
10 |       person("Abe", "Neuwirth", role = c("ctb")),
11 |       person("Mike", "Gruszczynski", role = c("ctb"))
12 |     )
13 | Encoding: UTF-8
14 | Maintainer: Bob Rudis <bob@rud.is>
15 | Description: The 'GDELT' Television Explorer (<http://television.gdeltproject.org/cgi-bin/iatv_ftxtsearch/iatv_ftxtsearch>) 
16 |     provides a simple and straighforward interface for searching through current and historical
17 |     closed-caption records from television news sources all across the globe. Functions are
18 |     provided to query and tidy this data for more in-depth analyses.
19 | URL: https://github.com/hrbrmstr/newsflash
20 | BugReports: https://github.com/hrbrmstr/newsflash/issues
21 | License: AGPL
22 | Suggests:
23 |     testthat
24 | Depends:
25 |     R (>= 3.2.0)
26 | Imports:
27 |     httr,
28 |     jsonlite,
29 |     dplyr,
30 |     tidyr,
31 |     lubridate,
32 |     rvest,
33 |     xml2,
34 |     stringi,
35 |     DT,
36 |     scales,
37 |     purrr,
38 |     tidytext,
39 |     curl,
40 |     txtplot,
41 |     readr,
42 |     utils
43 | RoxygenNote: 6.0.1.9000
44 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(gd_top_trending)
 4 | export(iatv_top_trending)
 5 | export(list_chyrons)
 6 | export(list_networks)
 7 | export(query_tv)
 8 | export(read_chyrons)
 9 | export(word_cloud)
10 | import(httr)
11 | importFrom(DT,datatable)
12 | importFrom(curl,curl_fetch_memory)
13 | importFrom(dplyr,"%>%")
14 | importFrom(dplyr,arrange)
15 | importFrom(dplyr,as_data_frame)
16 | importFrom(dplyr,count)
17 | importFrom(dplyr,data_frame)
18 | importFrom(dplyr,mutate)
19 | importFrom(dplyr,progress_estimated)
20 | importFrom(dplyr,select)
21 | importFrom(dplyr,tbl_df)
22 | importFrom(jsonlite,fromJSON)
23 | importFrom(lubridate,is.Date)
24 | importFrom(lubridate,ymd_hms)
25 | importFrom(purrr,"%||%")
26 | importFrom(purrr,discard)
27 | importFrom(purrr,keep)
28 | importFrom(purrr,map)
29 | importFrom(purrr,map_df)
30 | importFrom(purrr,safely)
31 | importFrom(readr,cols)
32 | importFrom(readr,read_tsv)
33 | importFrom(rvest,html_attr)
34 | importFrom(rvest,html_nodes)
35 | importFrom(rvest,html_text)
36 | importFrom(scales,comma)
37 | importFrom(stringi,stri_match_all_regex)
38 | importFrom(stringi,stri_read_lines)
39 | importFrom(stringi,stri_replace_all_regex)
40 | importFrom(stringi,stri_split_fixed)
41 | importFrom(tidyr,unnest)
42 | importFrom(tidytext,unnest_tokens)
43 | importFrom(txtplot,txtbarchart)
44 | importFrom(utils,browseURL)
45 | importFrom(xml2,read_html)
46 | importFrom(xml2,read_xml)
47 | importFrom(xml2,xml_attr)
48 | importFrom(xml2,xml_find_all)
49 | importFrom(xml2,xml_find_first)
50 | importFrom(xml2,xml_text)
51 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | 0.6.0
 2 | * add functions to work with the Third Eye chyron scraper archive
 3 | 
 4 | 0.5.0
 5 | * add `top_trending()`
 6 | * add `top_trending_ranged()`
 7 | 
 8 | 0.4.1
 9 | * add `txtplot` to `DESCRIPTION`; Fixes #2
10 | 
11 | 0.4.0
12 | * had to switch to `curl` direct calls since `httr` was being silly on large JSON results
13 | * sub out `anytime` for `lubridate` to handle hour resolution in `top_matches`
14 | * Handle support for new query features
15 | 
16 | 0.3.0
17 | * `top_text()` returns a tidy data frame by default
18 | 
19 | 0.2.0
20 | * Some extra helper functions
21 | 
22 | 0.1.0 
23 | * Initial release
24 | 


--------------------------------------------------------------------------------
/R/aaa.r:
--------------------------------------------------------------------------------
1 | utils::globalVariables(c("station_values", "date_start", "date_end", "keyword", "network", "date_range",
2 |                          "station", "show", "show_date", "word", "snippet", ".x"))
3 | 
4 | sfj <- purrr::safely(jsonlite::fromJSON)
5 | 
6 | s_head <- purrr::safely(httr::HEAD)


--------------------------------------------------------------------------------
/R/gd-top-trending.R:
--------------------------------------------------------------------------------
1 | #' Top Trending (GDELT)
2 | #'
3 | #' Retrieve current (last 15 minute) "top topics" being discussed on stations
4 | #' @export
5 | gd_top_trending <- function() {
6 |   query_tv("", mode = "TrendingTopics")
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/R/list-chyrons.r:
--------------------------------------------------------------------------------
 1 | #' Retrieve Third Eye chyron index
 2 | #'
 3 | #' Returns a data frame with available chyron dates & selected metadata.
 4 | #'
 5 | #' @md
 6 | #' @return data frame with three columns:
 7 | #' - `ts` (`POSIXct`) chyron timestamp
 8 | #' - `type` (`character`) `raw` or `cleaned`
 9 | #' - `size` (`numeric`) size of the feed file in bytes
10 | #' @export
11 | list_chyrons <- function() {
12 | 
13 |   doc <- xml2::read_xml("https://archive.org/download/third-eye/third-eye_files.xml")
14 |   fils <- xml_find_all(doc, ".//file[contains(@name, 'tsv') and (contains(@name, '20'))]")
15 | 
16 |   fname <- xml_attr(fils, "name")
17 | 
18 |   data_frame(
19 |     ts = as.Date(substr(fname, 1, 10)),
20 |     type = ifelse(grepl("twe", fname), "cleaned", "raw"),
21 |     size = as.numeric(xml_text(xml_find_first(fils, ".//size")))
22 |   ) %>% arrange(desc(ts))
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/R/networks.r:
--------------------------------------------------------------------------------
 1 | #' Helper function to identify station/network keyword and corpus date range for said market
 2 | #'
 3 | #' The \code{filter_network} of \code{query_tv()} is picky so this helps you idenitify the
 4 | #' keyword to use for the particular network/station.
 5 | #'
 6 | #' The list also shows the date ranges available for the captions, so you can use that as
 7 | #' a guide when picking dates.
 8 | #'
 9 | #' In interactive mode it uses \code{DT::datatable()}. You can force it to just display to
10 | #' the console by passing in \code{widget=FALSE}
11 | #'
12 | #' @export
13 | #' @param widget if `TRUE` then an HTML widget will be displayed to make it easier to
14 | #'        sift through stations/networks
15 | #' @return data frame
16 | #' @examples
17 | #' list_networks() # widget
18 | #' print(list_networks(FALSE)) # no widget
19 | list_networks <- function(widget = interactive()) {
20 | 
21 |   xdf <- jsonlite::fromJSON("https://api.gdeltproject.org/api/v2/tv/tv?mode=stationdetails&format=json")
22 | 
23 |   xdf$station_details %>%
24 |   mutate(StartDate = as.Date(anytime::anytime(StartDate))) %>%
25 |     mutate(EndDate = as.Date(anytime::anytime(StartDate))) -> xdf
26 | 
27 |   if (widget) DT::datatable(xdf)
28 | 
29 |   class(xdf) <- c("tbl_df", "tbl", "data.frame")
30 | 
31 |   xdf
32 | 
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/R/newsflash-package.R:
--------------------------------------------------------------------------------
 1 | #' Tools to Work with the Internet Archive and GDELT Television Explorer
 2 | #'
 3 | #' @name newsflash
 4 | #' @docType package
 5 | #' @author Bob Rudis (bob@@rud.is)
 6 | #' @import httr
 7 | #' @importFrom readr read_tsv cols
 8 | #' @importFrom rvest html_nodes html_attr html_text
 9 | #' @importFrom stringi stri_match_all_regex stri_replace_all_regex stri_split_fixed stri_read_lines
10 | #' @importFrom xml2 read_html read_xml xml_find_all xml_text xml_attr xml_find_first
11 | #' @importFrom lubridate ymd_hms is.Date
12 | #' @importFrom tidyr unnest
13 | #' @importFrom dplyr tbl_df %>% mutate data_frame count as_data_frame select progress_estimated arrange
14 | #' @importFrom purrr map_df %||% safely map discard keep
15 | #' @importFrom jsonlite fromJSON
16 | #' @importFrom DT datatable
17 | #' @importFrom scales comma
18 | #' @importFrom txtplot txtbarchart
19 | #' @importFrom tidytext unnest_tokens
20 | #' @importFrom curl curl_fetch_memory
21 | #' @importFrom utils browseURL
22 | NULL
23 | 


--------------------------------------------------------------------------------
/R/newsflash.r:
--------------------------------------------------------------------------------
  1 | #' Issue a query to the TV Explorer
  2 | #'
  3 | #' NOTE: The `mode` parameter controls what is returned. See the section on `Mode` for more information on available modes.
  4 | #'
  5 | #' @section Mode:
  6 | #'
  7 | #' This specifies the specific output you would like from the API, ranging from timelines to word clouds to clip galleries.
  8 | #'
  9 | #' - `TimelineVol`. (Default) This tracks how many results your search generates by day/hour over the selected time period, allowing you to assess the relative attention each is paying to the topic and how that attention has varied over time. Using the DATANORM parameter you can control whether this reports results as raw clip counts or as normalized percentages of all coverage (the most robust way of comparing stations). By default, the timeline will not display the most recent 24 hours, since those results are still being generated (it can take up to 2-12 hours for a show to be processed by the Internet Archive and ready for analysis), but you can include those if needed via the LAST24 option. You can also smooth the timeline using the TIMELINESMOOTH option and combine all selected stations into a single time series using the DATACOMB option.
 10 | #' - `StationChart`. This compares how many results your search generates from each of the selected stations over the selected time period, allowing you to assess the relative attention each is paying to the topic. Using the DATANORM parameter you can control whether this reports results as raw clip counts or as normalized percentages of all coverage (the most robust way of comparing stations).
 11 | #' - `TimelineVolNorm`. This displays the total airtime (in terms of 15 second clips) monitored from each of the stations in your query. It must be combined with a valid query, since it displays the airtime for the stations queried in the search. This mode can be used to identify brief monitoring outages or for advanced normalization, since it reports the total amount of clips monitored overall from each station in each day/hour.
 12 | #'
 13 | #' @section Queries:
 14 | #'
 15 | #' The GDELT TV API supports keyword and keyphrase searches, OR statements and a variety of advanced operators. NOTE – all of the operators below must be used as part of the value of the QUERY field, separated by spaces, and cannot be used as URL parameters on their own.
 16 | #'
 17 | #' - `""`. Anything found inside of quote marks is treated as an exact phrase search. Thus, you can search for "Donald Trump" to find all matches of his name. (e.g  `"donald trump"`)
 18 | #' - `(a OR b)`. You can specify a list of keywords to be boolean OR'd together by enclosing them in parentheses and placing the capitalized word "OR" between each keyword or phrase. Boolean OR blocks cannot be nested at this time. For example, to search for mentions of Clinton, Sanders or Trump, you would use "`(clinton OR sanders OR trump)`"
 19 | #' - `-`. You can place a minus sign in front of any operator, word or phrase to exclude it. For example "-sanders" would exclude results that contained "sanders" from your results. (e.g. `-sanders`)
 20 | #' - `Context`. By default all of your keywords/phrases must appear in a single 15 second clip. (Phrases are allowed to span across two clips and are counted towards the clip they started in). The "context" operator allows you to require that a given keyword/phrase appears either in the 15 second clip or in the 15 second clips immediately before or after it. This gives you a bit of additional search fuzziness. Even when searching for a single word, it must appear in quote marks. (e.g. `context:"russia"`)
 21 | #' - `Market`. This narrows your search to a particular geographic market. The list of available markets can be found via the Station Details mode (look for the city name in the description of local stations). Example markets include "San Francisco" and "Philadelphia". The market name must be enclosed in quote marks. You can also use the special reserved market "National" to search the major national networks together. (e.g. `market:"San Francisco"`)
 22 | #' - `Network`. This narrows your search to a particular television network. The list of available networks can be found via the Station Details mode (look for the network name in the description of local stations). Example markets include "CBS" and "NBC". Do not use quote marks around the network name. (e.g. `network:CBS`)
 23 | #' - Show. This narrows your search to a particular television show. This must be the complete show name as returned by the TV API. To find a particular show, search the API and use the "clipgallery" mode to display matching clips and their source show. For example, to limit your search to the show Hardball With Chris Matthews, you'd search for "show:"Hardball With Chris Matthews"". Note that you must surround the show name with quote marks. Remember that the TV API only searches shows monitored by the Internet Archive's Television News Archive, which may not include all shows. (e.g. `show:"Hardball With Chris Matthews"`)
 24 | #' - `Station`. This narrows your search to a particular television station. Remember that the TV API only searches stations monitored by the Internet Archive's Television News Archive and not all of those stations have been monitored for the entire 2009-present time period. Do not use quote marks around the name of the station. To find the Station ID of a particular station, use the Station Details mode. (e.g. `station:CNN`)
 25 | #'
 26 | #' @md
 27 | #' @param query query string in GDELT format. See `QUERY` in https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/
 28 | #'     for details; use [list_networks()] to obtain valid station/network identifiers. If
 29 | #'     no `Network:`, `Market:` or `Station:` qualifiers are found `Market:"National"` is automatically added.
 30 | #' @param mode See `Mode` section
 31 | #' @param start_date,end_date start/end dates. Leaving both `NULL` searches all archive history.
 32 | #'     Leaving just `start_date` `NULL` sets the start date to July 2009. Leaving just `end_date`
 33 | #'     `NULL` sets the end date to today.
 34 | #' @param datanorm normalized ("`perc`") vs "`raw`" counts; defaults to `perc`.
 35 | #' @param timelinesmooth a smoothing value applying moving averages over 15-minute increments
 36 | #' @param datacomb if "`combined`", all network volume is combined into a single value.
 37 | #'    Defaults to "`separate`".
 38 | #' @param last_24  It can take the Internet Archive up to 24 hours to process a broadcast once
 39 | #'    it concludes. Thus, by default the TV API does not return results from the most recent
 40 | #'    24 hours to ensure that analyses are not skewed by partial results. However, when
 41 | #'    tracking breaking news events, it may be desirable to view partial results with the
 42 | #'    understanding that any time or station-based trends may not accurately reflect the
 43 | #'    totality of their coverage. To include results from the most recent 24 hours,
 44 | #'    set this URL parameter to "yes".
 45 | #' @return Different objects for different `mode`s:
 46 | #' - `TimelineVol` : a data frame with stations & counts (raw or normalied)
 47 | #' - `TimelineVolNorm` : a data frame of station & topic airtime
 48 | #' - `StationChart` : a data frame of stations and search result counts (raw or normalized)
 49 | #' @references <https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/>
 50 | #' @export
 51 | #' @examples
 52 | #' query_tv("(terror isis")
 53 | #' query_tv("british prime minister")
 54 | #' query_tv("mexican president")
 55 | query_tv <- function(query,
 56 |                      mode = c("TimelineVol", "StationChart", "TimelineVolNorm"),
 57 |                      start_date = NULL,
 58 |                      end_date = NULL,
 59 |                      datanorm = c("perc", "raw"),
 60 |                      timelinesmooth = 0,
 61 |                      datacomb = c("separate", "combined"),
 62 |                      last_24 = c("yes", "no")) {
 63 | 
 64 |   if (!grepl("Network:|Market:|Station:", query, ignore.case = TRUE)) {
 65 |     query <- sprintf('%s Market:"National"', query)
 66 |   }
 67 | 
 68 |   mode <- mode[1]
 69 | 
 70 |   if (!(mode %in% c("TimelineVol", "ClipGallery", "StationChart",
 71 |                             "TimelineVolNorm", "TrendingTopics", "WordCloud"))) {
 72 |      stop("Invalid 'mode'", call.=FALSE)
 73 |   }
 74 | 
 75 |   datanorm <- match.arg(datanorm, c("perc", "raw"))
 76 | 
 77 |   datacomb <- match.arg(datacomb, c("separate", "combined"))
 78 |   if (datacomb == "separate") datacomb <- NULL
 79 | 
 80 |   last_24 <-  match.arg(last_24, c("yes", "no"))
 81 |   if (last_24 == "no") last_24 <- NULL
 82 | 
 83 |   if (is.null(start_date)) start_date <- as.Date("2009-07-02")
 84 |   if (is.null(end_date)) end_date <- Sys.Date()
 85 | 
 86 |   start_date <- as.POSIXct(start_date)
 87 |   end_date <- as.POSIXct(end_date)
 88 | 
 89 |   start_date <- format(start_date, "%Y%m%d%H%M%S")
 90 |   end_date <- format(end_date, "%Y%m%d%H%M%S")
 91 | 
 92 |   list(
 93 |     query = query,
 94 |     mode = mode,
 95 |     format = "json",
 96 |     datanorm = datanorm,
 97 |     datacomb = datacomb,
 98 |     startdatetime = start_date,
 99 |     enddatetime = end_date,
100 |     timelinesmooth = timelinesmooth,
101 |     last24 = last_24
102 |   ) -> query
103 | 
104 |   if (mode == "ClipGallery") query$maxresults <- 3000L
105 | 
106 |   httr::GET(
107 |     url = "https://api.gdeltproject.org/api/v2/tv/tv",
108 |     query = query
109 |   ) -> res
110 | 
111 |   if (!(res$status_code < 300)) {
112 |     stop(sprintf("[%s] Query or API error on request [%s]", res$status_code, URL), call.=FALSE)
113 |   }
114 | 
115 |   res <- httr::content(res)
116 | 
117 |   if (mode %in% c("TimelineVol", "TimelineVolNorm")) {
118 | 
119 |     tibble::data_frame(
120 |       network = res$timeline %>% purrr::map_chr("series"),
121 |       data = res$timeline %>% purrr::map("data")) %>%
122 |       tidyr::unnest(data) %>%
123 |       dplyr::mutate(
124 |         date = data %>%
125 |           purrr::map_chr("date") %>%
126 |           # hourly data doesn't have times - API doesn't return time values - split off non-hms data from date-time
127 |           #sapply(., function(x) strsplit(x, "T")[[1]][1]) %>%
128 |           lubridate::ymd_hms(),
129 |         value = data %>%purrr::map_dbl("value")
130 |       ) %>%
131 |       dplyr::select(-data)
132 | 
133 |   } else if (mode == "ClipGallery") {
134 | 
135 |     purrr::map_df(res$clips, ~.x) %>%
136 |       dplyr::mutate(date = anytime::anydate(date)) %>%
137 |       dplyr::mutate(show_date = anytime::anydate(show_date))
138 | 
139 |   } else if (mode == "StationChart") {
140 | 
141 |     purrr::map_df(res$stationchart, ~.x)
142 | 
143 |   } else if (mode == "TrendingTopics") {
144 | 
145 |     list(
146 |       overall_trending_topics = unlist(res$OverallTrendingTopics, use.names = FALSE),
147 |       station_trending_topics = purrr::map_df(res$StationTrendingTopics, ~{
148 |         dplyr::data_frame(
149 |           station = .x$Station,
150 |           topic = unlist(.x$Topics, use.names = FALSE)
151 |         )
152 |       }),
153 |       station_top_topics = purrr::map_df(res$StationTopTopics, ~{
154 |         dplyr::data_frame(
155 |           station = .x$Station,
156 |           topic = unlist(.x$Topics, use.names = FALSE)
157 |         )
158 |       }),
159 |       overall_trending_phrases = unlist(res$OverallTrendingPhrases, use.names=FALSE)
160 |     )
161 | 
162 |   } else if (mode == "WordCloud") {
163 | 
164 |     purrr::map_df(res$wordcloud, ~.x)
165 | 
166 |   }
167 | 
168 | }
169 | 


--------------------------------------------------------------------------------
/R/third-eye.r:
--------------------------------------------------------------------------------
 1 | readr::cols(
 2 |   ts = readr::col_datetime(format = ""),
 3 |   channel = readr::col_character(),
 4 |   duration = readr::col_integer(),
 5 |   details = readr::col_character(),
 6 |   text = readr::col_character()
 7 | ) -> .third_eye_cols
 8 | 
 9 | .third_eye_col_names <- c("ts", "channel", "duration", "details", "text")
10 | .third_eye_url_tmpl <- "https://archive.org/download/third-eye/%s%s.tsv"
11 | 
12 | #' Retrieve TV News Archive chyrons from the Internet Archive's Third Eye project
13 | #'
14 | #' The TV News Archive's Third Eye project captures the chyrons–or narrative text–that appear on the lower third of TV news screens and turns them into downloadable data and a Twitter feed for research, journalism, online tools, and other projects. At project launch (September 2017) we are collecting chyrons from BBC News, CNN, Fox News, and MSNBC–more than four million collected over just two weeks. Chyrons have public value because:
15 | #'  - Breaking news often appears on chyrons before TV newscasters begin reporting or video is available, whether it's a hurricane or a breaking political story.
16 | #'  - Which chyrons a TV news network chooses to display can reveal editorial decisions that can inform public understanding of how news is filtered for different audiences.
17 | #'  - Providing chyrons as data–and also on Twitter–in near real-time can serve as a alert system, showing how TV news stations are reporting the news. Often the chyrons are ahead of the general conversation on Twitter.
18 | #'
19 | #' Some notes on the data
20 | #'
21 | #' - chyrons are derived in near real-time from the TV News Archive's collection of TV news. The constantly updating public collection contains 1.4 million TV news shows, some dating back to 2009.
22 | #' - At launch, Third Eye captures four TV cable news channels: BBC News, CNN, Fox News, and MSNBC.
23 | #' - Data can be affected by temporary collection outages, which typically can last minutes or hours, but rarely more.
24 | #' - Dates/times are in UTC (Coordinated Universal Time).
25 | #' - Because the size of the raw data is so large (about 20 megabytes per day), results are limited to seven days per request.
26 | #' - Raw data collection began on August 25, 2017; the clean feed begins on September 7, 2017.
27 | #' - "`duration`" column is in seconds–the amount of time that particular chyron appeared on the screen.
28 | #'
29 | #' @md
30 | #' @note It is _highly_ recommended that you use the "clean" feed unless you're researching
31 | #'       how to correct text. This package does it's best to read in the raw feed but
32 | #'       it often contains embedded nulls and non-standard text encodings which
33 | #'       make it difficult to process.
34 | #' @param chyron_day archive day (`Date` or `character`; if `character` should be
35 | #'        in `YYYY-mm-dd` format)
36 | #' @param cleaned logical, default `TRUE`. The "raw feed" option provides all of the
37 | #'       OCR'ed text from chyrons at the rate of approximately one entry per second.
38 | #'       The "clean feed" download provides the data feed that fuels the Third Eye
39 | #'       Twitter bots; this has been filtered to find the most representative,
40 | #'       clearest chyrons from a 60-second period, with no more than one entry/tweet per
41 | #'       minute (though the duration may be shorter than 60 seconds.) The clean feed
42 | #'       relies on algorithms that are a work in progress.
43 | #' @return `NULL` on irrecoverable errors, otherwise a data frame with five columns:
44 | #' - `ts` (`POSIXct`) chyron timestamp
45 | #' - `channel` (`character`) news channel the chyron appeared on
46 | #' - `duration` (`integer`) see Description
47 | #' - `details` (`character`) Internet Archive details path
48 | #' - `text` (`character`) the chyron text
49 | #' @export
50 | read_chyrons <- function(chyron_day = Sys.Date()-1, cleaned = TRUE) {
51 | 
52 |   if (length(chyron_day) > 1) {
53 |     message("Can only retrieve one day's archive at a time. Using first value.")
54 |     chyron_day <- chyron_day[1]
55 |   }
56 | 
57 |   if (inherits(chyron_day, "character")) {
58 |     chyron_day <- as.Date(chyron_day) # ensure it's valid
59 |   }
60 | 
61 |   chyron_day <- format(chyron_day, "%Y-%m-%d")
62 | 
63 |   archive_type <- if (cleaned) "-tweets" else ""
64 | 
65 |   archive_url <- sprintf(.third_eye_url_tmpl, chyron_day, archive_type)
66 | 
67 |   # see if it's there
68 |   res <- s_head(archive_url)
69 |   if (is.null(res)) {
70 |     message(sprintf("Error reaching the Internet Archive [%s]", res$error))
71 |     return(NULL)
72 |   }
73 | 
74 |   if (httr::status_code(res$result) != 200) {
75 |     message(sprintf("Chyron archive request failed: [%s]", httr::http_status(res$result)$message))
76 |     return(NULL)
77 |   }
78 | 
79 |   tf <- tempfile()
80 |   download.file(archive_url, tf, quiet = TRUE)
81 |   if (cleaned) {
82 |     third_eye <- read_tsv(tf, col_names = .third_eye_col_names, .third_eye_cols)
83 |   } else {
84 |     suppressWarnings(stri_read_lines(tf)) %>%
85 |       stri_split_fixed("\t", simplify = TRUE) %>%
86 |       as_data_frame() %>%
87 |       set_names(c("ts", "channel", "duration", "details", "text")) %>%
88 |       mutate(ts = lubridate::ymd_hms(ts)) -> third_eye
89 |   }
90 | 
91 |   unlink(tf)
92 | 
93 |   third_eye
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/R/top-tending-range.r:
--------------------------------------------------------------------------------
 1 | #' Top Trending Topics (Internet Archive TV Archive)
 2 | #'
 3 | #' Provide start & end times in current time zone and this function will generate
 4 | #' the proper "every 15-second" values, convert them to GMT values and issue the queries,
 5 | #' returning a nested data frame of results. If you want more control, use [top_trending()].
 6 | #'
 7 | #' GDELT now generates a snapshot every 15 minutes that records all of the "top trending"
 8 | #' tables into a single archive enabling users to ook back over time at what was trending
 9 | #' in 15 minute increments historically back to midnight on 2017-09-07.
10 | #'
11 | #' Note that the archives are generated every 15 minutes based on the television shows that
12 | #' have completed processing at that time. It can take several hours for a show to be fully
13 | #' processed by the Internet Archive and available for processing, thus the presence/absence
14 | #' of a topic in these files should not be used to date it precisely to that 15 minute mark,
15 | #' but rather as a rough temporal indicator of what topics were trending up/down in that
16 | #' general time frame. For precise timelines, you should take a topic from this archive and
17 | #' run a search on it using the main Television Explorer interface, select a timeframe of
18 | #' 72 hours and use the resulting timeline to precisely date the topic's coverage (since
19 | #' the Explorer timeline is based on the broadcast timestamp of the show, even if it is
20 | #' processed hours later).
21 | #'
22 | #' @md
23 | #' @param from,to start and end date/time ranges (will auto-convert if properly formatted strings)
24 | #' @param .progress show a progress bar? Defaukts to `TRUE` if in an interactive session.
25 | #' @note The times are auto-converted to GMT
26 | #' @export
27 | #' @examples
28 | #' top_trending("2017-09-08 18:00", "2017-09-09 06:00")
29 | iatv_top_trending <-  function(from, to, .progress=interactive()) {
30 | 
31 |   from <- anytime::anytime(from)
32 |   to <- anytime::anytime(to)
33 | 
34 |   base_url <- "http://data.gdeltproject.org/gdeltv3/iatv_trending/%s.tvtrending.v3.15min.json"
35 | 
36 |   start_ymd <- format(from, "%Y-%m-%d")
37 |   end_ymd <- format(to, "%Y-%m-%d")
38 | 
39 |   start_hr <- as.numeric(format(from, "%H"))
40 |   end_hr <- as.numeric(format(to, "%H"))
41 | 
42 |   start_min <- as.numeric(format(from, "%M"))
43 |   if (!start_min %in% c(0, 15, 30, 45)) start_min <- 0
44 | 
45 |   end_min <- as.numeric(format(to, "%M"))
46 |   if (!end_min %in% c(0, 15, 30, 45)) end_min <- 45
47 | 
48 |   from <- as.POSIXct(sprintf("%s %02d:%02d:00", start_ymd, start_hr, start_min))
49 |   to <- as.POSIXct(sprintf("%s %02d:%02d:00", end_ymd, end_hr, end_min))
50 | 
51 |   full_range <- seq(from, to, "15 mins")
52 | 
53 |   attr(full_range, "tzone") <- "GMT"
54 | 
55 |   url_list <- sprintf(base_url, format(full_range, "%Y%m%d%H%M00"))
56 | 
57 |   pb <- dplyr::progress_estimated(length(url_list))
58 |   purrr::map(url_list, ~{
59 |     if (.progress) pb$tick()$print()
60 |     res <- sfj(.x, flatten=TRUE)
61 |     res$result
62 |   }) -> res
63 | 
64 |   res <- purrr::discard(res, is.null)
65 | 
66 |   purrr::map_df(res, ~{
67 | 
68 |     date_gen <- .x[["DateGenerated:"]]
69 |     suppressWarnings(date_gen <- lubridate::ymd_hms(date_gen))
70 |     suppressWarnings(attr(date_gen, "tzone") <- Sys.timezone())
71 | 
72 |     dplyr::data_frame(
73 |       ts = date_gen,
74 |       overall_trending_topics = list(.x[["OverallTrendingTopics"]]),
75 |       station_trending_topics = list(.x[["StationTrendingTopics"]]),
76 |       station_top_topics = list(.x[["StationTopTopics"]]),
77 |       overall_trending_phrases = list(.x[["OverallTrendingPhrases"]])
78 |     )
79 | 
80 |   }) -> out
81 | 
82 |   out
83 | 
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/R/word-cloud.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve top words that appear most frequently in clips matching your search
 2 | #'
 3 | #' The API takes the 200 most relevant clips that match your search and returns the
 4 | #' terms for a "word cloud" of up to the top 200 most frequent words that appeared in
 5 | #' those clips (common stop words are automatically removed). This is a powerful way
 6 | #' of understanding the topics and words dominating the relevant coverage and
 7 | #' suggesting additional contextual search terms to narrow or evolve your search.
 8 | #' Note that if there are too few matching clips for your query, the word cloud may
 9 | #' be blank.
10 | #'
11 | #' @md
12 | #' @param query query string in GDELT format. See `QUERY` in https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/
13 | #'     for details; use [list_networks()] to obtain valid station/network identifiers
14 | #' @param start_date,end_date start/end dates. Leaving both `NULL` searches all archive history.
15 | #'     Leaving just `start_date` `NULL` sets the start date to July 2009. Leaving just `end_date`
16 | #'     `NULL` sets the end date to today.
17 | #' @export
18 | word_cloud <- function(query, start_date = NULL, end_date = NULL) {
19 | 
20 |   query_tv(
21 |     query = query,
22 |     mode = "WordCloud",
23 |     start_date = start_date,
24 |     end_date = end_date
25 |   )
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 | 
3 |   packageStartupMessage("NOTE: There are breaking changes to the package API due to GDELT's v2 API")
4 | 
5 | }


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: rmarkdown::github_document
  3 | editor_options: 
  4 |   chunk_output_type: console
  5 | ---
  6 | 
  7 | *** BREAKING CHANGES ***
  8 | 
  9 | # newsflash
 10 | 
 11 | Tools to Work with the Internet Archive and GDELT Television Explorer
 12 | 
 13 | ## Description
 14 | 
 15 | Ref: 
 16 | 
 17 | - <http://television.gdeltproject.org/cgi-bin/iatv_ftxtsearch/iatv_ftxtsearch>
 18 | - <https://archive.org/details/third-eye>
 19 | 
 20 | TV Explorer:
 21 | >_"In collaboration with the Internet Archive's Television News Archive, GDELT's Television Explorer allows you to keyword search the closed captioning streams of the Archive's 6 years of American television news and explore macro-level trends in how America's television news is shaping the conversation around key societal issues. Unlike the Archive's primary Television News interface, which returns results at the level of an hour or half-hour "show," the interface here reaches inside of those six years of programming and breaks the more than one million shows into individual sentences and counts how many of those sentences contain your keyword of interest. Instead of reporting that CNN had 24 hour-long shows yesterday that mentioned Donald Trump, the interface here will count how many sentences uttered on CNN yesterday mentioned his name - a vastly more accurate metric for assessing media attention."_
 22 | 
 23 | Third Eye:
 24 | >_The TV News Archive's Third Eye project captures the chyrons–or narrative text–that appear on the lower third of TV news screens and turns them into downloadable data and a Twitter feed for research, journalism, online tools, and other projects. At project launch (September 2017) we are collecting chyrons from BBC News, CNN, Fox News, and MSNBC–more than four million collected over just two weeks."_
 25 | 
 26 | An advantage of using this over the TV Explorer interactive selector & downloader or Third Eye API is that you get tidy tibbles with this package, ready to use in R.
 27 | 
 28 | NOTE: While I don't claim that this alpha-package is anywhere near perfect, the IA/GDELT TV API hiccups every so often so when there are critical errors run the same query in their web interface before submitting an issue. I kept getting errors when searching all affiliate markets for the "mexican president" query that also generate errors on the web site when JSON is selected as output (it's fine on the web site if the choice is interactive browser visualizations). Submit those errors to them, not here.
 29 | 
 30 | ## What's Inside The Tin
 31 | 
 32 | The following functions are implemented:
 33 | 
 34 | - `list_chyrons`:	Retrieve Third Eye chyron index
 35 | - `list_networks`:	Helper function to identify station/network keyword and corpus date range for said market
 36 | - `newsflash`:	Tools to Work with the Internet Archive and GDELT Television Explorer
 37 | - `query_tv`:	Issue a query to the TV Explorer
 38 | - `read_chyrons`:	Retrieve TV News Archive chyrons from the Internet Archive's Third Eye project
 39 | - `gd_top_trending`:	Top Trending (GDELT)
 40 | - `iatv_top_trending:	Top Trending Topics (Internet Archive TV Archive)
 41 | - `word_cloud`:	Retrieve top words that appear most frequently in clips matching your search
 42 | 
 43 | ## Installation
 44 | 
 45 | ```{r eval=FALSE}
 46 | devtools::install_github("hrbrmstr/newsflash")
 47 | ```
 48 | 
 49 | ```{r message=FALSE, warning=FALSE, error=FALSE}
 50 | options(width=120)
 51 | ```
 52 | 
 53 | ## Usage
 54 | 
 55 | ```{r message=FALSE, warning=FALSE, error=FALSE}
 56 | library(newsflash)
 57 | library(ggalt)
 58 | library(hrbrthemes)
 59 | library(tidyverse)
 60 | 
 61 | # current verison
 62 | packageVersion("newsflash")
 63 | ```
 64 | 
 65 | ### "Third Eye" Chyrons are simpler so we'll start with them first:
 66 | 
 67 | ```{r fig.width=8, fig.height=5, cache=TRUE}
 68 | list_chyrons()
 69 | 
 70 | ch <- read_chyrons("2018-04-13")
 71 | 
 72 | mutate(
 73 |   ch, 
 74 |   hour = lubridate::hour(ts),
 75 |   text = tolower(text),
 76 |   mention = grepl("comey", text)
 77 | ) %>% 
 78 |   filter(mention) %>% 
 79 |   count(hour, channel) %>% 
 80 |   ggplot(aes(hour, n)) +
 81 |   geom_segment(aes(xend=hour, yend=0), color = "lightslategray", size=1) +
 82 |   scale_x_continuous(name="Hour (GMT)", breaks=seq(0, 23, 6),
 83 |                      labels=sprintf("%02d:00", seq(0, 23, 6))) +
 84 |   scale_y_continuous(name="# Chyrons", limits=c(0,20)) +
 85 |   facet_wrap(~channel, scales="free") +
 86 |   labs(title="Chyrons mentioning 'Comey' per hour per channel",
 87 |        caption="Source: Internet Archive Third Eye project & <github.com/hrbrmstr/newsflash>") +
 88 |   theme_ipsum_rc(grid="Y")
 89 | ```
 90 | 
 91 | ## Now for the TV Explorer:
 92 | 
 93 | ### See what networks & associated corpus date ranges are available:
 94 | 
 95 | ```{r}
 96 | list_networks(widget=FALSE)
 97 | ```
 98 | 
 99 | ### Basic search:
100 | 
101 | ```{r fig.width=8, fig.height=7, cache=TRUE}
102 | comey <- query_tv('comey', start_date = "2018-04-01")
103 | 
104 | comey
105 | 
106 | query_tv('comey', start_date = "2018-04-01") %>% 
107 |   arrange(date) %>% 
108 |   ggplot(aes(date, value, group=network)) +
109 |   ggalt::geom_xspline(aes(color=network)) +
110 |   ggthemes::scale_color_tableau(name=NULL) +
111 |   labs(x=NULL, y="Volume Metric", title="'Comey' Trends Across National Networks") +
112 |   facet_wrap(~network) +
113 |   theme_ipsum_rc(grid="XY") +
114 |   theme(legend.position="none")
115 | ```
116 | 
117 | ```{r cache=TRUE}
118 | query_tv("comey Network:CNN", mode = "TimelineVol", start_date = "2018-01-01") %>% 
119 |   arrange(date) %>% 
120 |   ggplot(aes(date, value, group=network)) +
121 |   ggalt::geom_xspline(color="lightslategray") +
122 |   ggthemes::scale_color_tableau(name=NULL) +
123 |   labs(x=NULL, y="Volume Metric", title="'Comey' Trend on CNN") +
124 |   theme_ipsum_rc(grid="XY")
125 | ```
126 | 
127 | ### Relative Network Attention To Syria since January 1, 2018
128 | 
129 | ```{r cache=TRUE}
130 | query_tv('syria Market:"National"', mode = "StationChart", start_date = "2018-01-01") %>% 
131 |   arrange(desc(count)) %>% 
132 |   knitr::kable("markdown")
133 | ```
134 | 
135 | ### Video Clips
136 | 
137 | ```{r cache=TRUE}
138 | clips <- query_tv('comey Market:"National"', mode = "ClipGallery", start_date = "2018-01-01")
139 | 
140 | clips
141 | ```
142 | 
143 | `r clips$show_date[1]` | `r clips$station[1]` | `r clips$show[1]`
144 | 
145 | <a href="`r clips$preview_url[1]`"><img src="`r clips$preview_thumb[1]`"></a>
146 | 
147 | `r clips$snippet[1]`
148 | 
149 | ### "Word Cloud" (top associated words to the query)
150 | 
151 | ```{r fig.height=8, fig.width=8, cache=TRUE}
152 | wc <- query_tv('hannity Market:"National"', mode = "WordCloud", start_date = "2018-04-13")
153 | 
154 | ggplot(wc, aes(x=1, y=1)) +
155 |   ggrepel::geom_label_repel(aes(label=label, size=count), segment.colour="#00000000", segment.size=0) +
156 |   scale_size_continuous(trans="sqrt") +
157 |   labs(x=NULL, y=NULL) +
158 |   theme_ipsum_rc(grid="") +
159 |   theme(axis.text=element_blank()) +
160 |   theme(legend.position="none") 
161 | ```
162 | 
163 | ### Last 15 Minutes Top Trending
164 | 
165 | ```{r}
166 | gd_top_trending()
167 | ```
168 | 
169 | ### Top Overall Trending from the Internet Archive TV Archive (2017 and earlier)
170 | 
171 | ```{r}
172 | iatv_top_trending("2017-12-01 18:00", "2017-12-02 06:00")
173 | ```


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | \*\*\* BREAKING CHANGES \*\*\*
  3 | 
  4 | # newsflash
  5 | 
  6 | Tools to Work with the Internet Archive and GDELT Television
  7 |     Explorer
  8 | 
  9 | ## Description
 10 | 
 11 | Ref:
 12 | 
 13 |   - <http://television.gdeltproject.org/cgi-bin/iatv_ftxtsearch/iatv_ftxtsearch>
 14 |   - <https://archive.org/details/third-eye>
 15 | 
 16 | TV Explorer: 
 17 | 
 18 | >_“In collaboration with the Internet Archive’s
 19 | Television News Archive, GDELT’s Television Explorer allows you to
 20 | keyword search the closed captioning streams of the Archive’s 6 years of
 21 | American television news and explore macro-level trends in how America’s
 22 | television news is shaping the conversation around key societal issues.
 23 | Unlike the Archive’s primary Television News interface, which returns
 24 | results at the level of an hour or half-hour”show," the interface here
 25 | reaches inside of those six years of programming and breaks the more
 26 | than one million shows into individual sentences and counts how many of
 27 | those sentences contain your keyword of interest. Instead of reporting
 28 | that CNN had 24 hour-long shows yesterday that mentioned Donald Trump,
 29 | the interface here will count how many sentences uttered on CNN
 30 | yesterday mentioned his name - a vastly more accurate metric for
 31 | assessing media attention.“_
 32 | 
 33 | Third Eye: 
 34 | 
 35 | >_The TV News Archive’s Third Eye project captures the
 36 | chyrons–or narrative text–that appear on the lower third of TV news
 37 | screens and turns them into downloadable data and a Twitter feed for
 38 | research, journalism, online tools, and other projects. At project
 39 | launch (September 2017) we are collecting chyrons from BBC News, CNN,
 40 | Fox News, and MSNBC–more than four million collected over just two
 41 | weeks.“_
 42 | 
 43 | An advantage of using this over the TV Explorer interactive selector &
 44 | downloader or Third Eye API is that you get tidy tibbles with this
 45 | package, ready to use in R.
 46 | 
 47 | NOTE: While I don’t claim that this alpha-package is anywhere near
 48 | perfect, the IA/GDELT TV API hiccups every so often so when there are
 49 | critical errors run the same query in their web interface before
 50 | submitting an issue. I kept getting errors when searching all affiliate
 51 | markets for the “mexican president” query that also generate errors on
 52 | the web site when JSON is selected as output (it’s fine on the web site
 53 | if the choice is interactive browser visualizations). Submit those
 54 | errors to them, not here.
 55 | 
 56 | ## What’s Inside The Tin
 57 | 
 58 | The following functions are implemented:
 59 | 
 60 |   - `list_chyrons`: Retrieve Third Eye chyron index
 61 |   - `list_networks`: Helper function to identify station/network keyword
 62 |     and corpus date range for said market
 63 |   - `newsflash`: Tools to Work with the Internet Archive and GDELT
 64 |     Television Explorer
 65 |   - `query_tv`: Issue a query to the TV Explorer
 66 |   - `read_chyrons`: Retrieve TV News Archive chyrons from the Internet
 67 |     Archive’s Third Eye project
 68 |   - `gd_top_trending`: Top Trending (GDELT)
 69 |   - \`iatv\_top\_trending: Top Trending Topics (Internet Archive TV
 70 |     Archive)
 71 |   - `word_cloud`: Retrieve top words that appear most frequently in
 72 |     clips matching your search
 73 | 
 74 | ## Installation
 75 | 
 76 | ``` r
 77 | devtools::install_github("hrbrmstr/newsflash")
 78 | ```
 79 | 
 80 | ``` r
 81 | options(width=120)
 82 | ```
 83 | 
 84 | ## Usage
 85 | 
 86 | ``` r
 87 | library(newsflash)
 88 | library(ggalt)
 89 | library(hrbrthemes)
 90 | library(tidyverse)
 91 | 
 92 | # current verison
 93 | packageVersion("newsflash")
 94 | ```
 95 | 
 96 |     ## [1] '0.6.0'
 97 | 
 98 | ### “Third Eye” Chyrons are simpler so we’ll start with them first:
 99 | 
100 | ``` r
101 | list_chyrons()
102 | ```
103 | 
104 |     ## # A tibble: 457 x 3
105 |     ##    ts         type         size
106 |     ##    <date>     <chr>       <dbl>
107 |     ##  1 2018-04-16 cleaned   297177.
108 |     ##  2 2018-04-16 raw     10436998.
109 |     ##  3 2018-04-15 cleaned   347063.
110 |     ##  4 2018-04-15 raw      9884284.
111 |     ##  5 2018-04-14 cleaned   470448.
112 |     ##  6 2018-04-14 raw     13709682.
113 |     ##  7 2018-04-13 cleaned   410976.
114 |     ##  8 2018-04-13 raw     12058117.
115 |     ##  9 2018-04-12 cleaned   384796.
116 |     ## 10 2018-04-12 raw     11750908.
117 |     ## # ... with 447 more rows
118 | 
119 | ``` r
120 | ch <- read_chyrons("2018-04-13")
121 | 
122 | mutate(
123 |   ch, 
124 |   hour = lubridate::hour(ts),
125 |   text = tolower(text),
126 |   mention = grepl("comey", text)
127 | ) %>% 
128 |   filter(mention) %>% 
129 |   count(hour, channel) %>% 
130 |   ggplot(aes(hour, n)) +
131 |   geom_segment(aes(xend=hour, yend=0), color = "lightslategray", size=1) +
132 |   scale_x_continuous(name="Hour (GMT)", breaks=seq(0, 23, 6),
133 |                      labels=sprintf("%02d:00", seq(0, 23, 6))) +
134 |   scale_y_continuous(name="# Chyrons", limits=c(0,20)) +
135 |   facet_wrap(~channel, scales="free") +
136 |   labs(title="Chyrons mentioning 'Comey' per hour per channel",
137 |        caption="Source: Internet Archive Third Eye project & <github.com/hrbrmstr/newsflash>") +
138 |   theme_ipsum_rc(grid="Y")
139 | ```
140 | 
141 | ![](README_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
142 | 
143 | ## Now for the TV Explorer:
144 | 
145 | ### See what networks & associated corpus date ranges are available:
146 | 
147 | ``` r
148 | list_networks(widget=FALSE)
149 | ```
150 | 
151 |     ## # A tibble: 159 x 6
152 |     ##    StationID Description                Market               Network   StartDate  EndDate   
153 |     ##    <chr>     <chr>                      <chr>                <chr>     <date>     <date>    
154 |     ##  1 ALJAZ     Al Jazeera                 International        ALJAZ     2017-09-11 2017-09-11
155 |     ##  2 ALJAZAM   Al Jazeera America         NationalDiscontinued ALJAZAM   2013-08-20 2013-08-20
156 |     ##  3 BBCNEWS   BBC News                   International        BBCNEWS   2017-01-01 2017-01-01
157 |     ##  4 BETW      BET - San Francisco (BETW) San Francisco        BET       2016-12-13 2016-12-13
158 |     ##  5 BLOOMBERG Bloomberg                  National             BLOOMBERG 2013-12-05 2013-12-05
159 |     ##  6 CNBC      CNBC                       National             CNBC      2009-07-02 2009-07-02
160 |     ##  7 CNN       CNN                        National             CNN       2009-07-02 2009-07-02
161 |     ##  8 COM       Comedy Central             NationalSpecialty    COM       2011-05-10 2011-05-10
162 |     ##  9 CSPAN     CSPAN                      National             CSPAN     2009-06-04 2009-06-04
163 |     ## 10 CSPAN2    CSPAN2                     National             CSPAN     2009-06-04 2009-06-04
164 |     ## # ... with 149 more rows
165 | 
166 | ### Basic search:
167 | 
168 | ``` r
169 | comey <- query_tv('comey', start_date = "2018-04-01")
170 | 
171 | comey
172 | ```
173 | 
174 |     ## # A tibble: 144 x 3
175 |     ##    network date        value
176 |     ##    <chr>   <date>      <dbl>
177 |     ##  1 CSPAN3  2018-04-01 0.0273
178 |     ##  2 CSPAN3  2018-04-02 0.    
179 |     ##  3 CSPAN3  2018-04-03 0.    
180 |     ##  4 CSPAN3  2018-04-04 0.0241
181 |     ##  5 CSPAN3  2018-04-05 0.    
182 |     ##  6 CSPAN3  2018-04-06 0.    
183 |     ##  7 CSPAN3  2018-04-07 0.    
184 |     ##  8 CSPAN3  2018-04-08 0.    
185 |     ##  9 CSPAN3  2018-04-09 0.    
186 |     ## 10 CSPAN3  2018-04-10 0.    
187 |     ## # ... with 134 more rows
188 | 
189 | ``` r
190 | query_tv('comey', start_date = "2018-04-01") %>% 
191 |   arrange(date) %>% 
192 |   ggplot(aes(date, value, group=network)) +
193 |   ggalt::geom_xspline(aes(color=network)) +
194 |   ggthemes::scale_color_tableau(name=NULL) +
195 |   labs(x=NULL, y="Volume Metric", title="'Comey' Trends Across National Networks") +
196 |   facet_wrap(~network) +
197 |   theme_ipsum_rc(grid="XY") +
198 |   theme(legend.position="none")
199 | ```
200 | 
201 | ![](README_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
202 | 
203 | ``` r
204 | query_tv("comey Network:CNN", mode = "TimelineVol", start_date = "2018-01-01") %>% 
205 |   arrange(date) %>% 
206 |   ggplot(aes(date, value, group=network)) +
207 |   ggalt::geom_xspline(color="lightslategray") +
208 |   ggthemes::scale_color_tableau(name=NULL) +
209 |   labs(x=NULL, y="Volume Metric", title="'Comey' Trend on CNN") +
210 |   theme_ipsum_rc(grid="XY")
211 | ```
212 | 
213 | ![](README_files/figure-gfm/unnamed-chunk-7-1.png)<!-- -->
214 | 
215 | ### Relative Network Attention To Syria since January 1, 2018
216 | 
217 | ``` r
218 | query_tv('syria Market:"National"', mode = "StationChart", start_date = "2018-01-01") %>% 
219 |   arrange(desc(count)) %>% 
220 |   knitr::kable("markdown")
221 | ```
222 | 
223 | | station      |  count |
224 | | :----------- | -----: |
225 | | FOX News     | 1.0148 |
226 | | CNN          | 0.8804 |
227 | | MSNBC        | 0.7668 |
228 | | CSPAN        | 0.6192 |
229 | | FOX Business | 0.5121 |
230 | | CSPAN2       | 0.3346 |
231 | | Bloomberg    | 0.3208 |
232 | | CSPAN3       | 0.2392 |
233 | | CNBC         | 0.2171 |
234 | 
235 | ### Video Clips
236 | 
237 | ``` r
238 | clips <- query_tv('comey Market:"National"', mode = "ClipGallery", start_date = "2018-01-01")
239 | 
240 | clips
241 | ```
242 | 
243 |     ## # A tibble: 32 x 8
244 |     ##    preview_url       ia_show_id    date       station show   show_date  preview_thumb           snippet                
245 |     ##    <chr>             <chr>         <date>     <chr>   <chr>  <date>     <chr>                   <chr>                  
246 |     ##  1 https://archive.… FOXNEWSW_201… 2018-04-13 FOX Ne… Shepa… 2018-04-13 https://archive.org/do… comey -- i mention it …
247 |     ##  2 https://archive.… MSNBCW_20180… 2018-03-20 MSNBC   MTP D… 2018-03-20 https://archive.org/do… donald trump ousted co…
248 |     ##  3 https://archive.… CNNW_2018041… 2018-04-16 CNN     CNN S… 2018-04-16 https://archive.org/do… comey versus comey or …
249 |     ##  4 https://archive.… MSNBCW_20180… 2018-04-12 MSNBC   The R… 2018-04-12 https://archive.org/do… and the president of c…
250 |     ##  5 https://archive.… FOXNEWSW_201… 2018-04-13 FOX Ne… The I… 2018-04-13 https://archive.org/do… comey announced when h…
251 |     ##  6 https://archive.… FBC_20180413… 2018-04-13 FOX Bu… After… 2018-04-13 https://archive.org/do… untethered to the trut…
252 |     ##  7 https://archive.… FBC_20180415… 2018-04-15 FOX Bu… The J… 2018-04-15 https://archive.org/do… that we haven't alread…
253 |     ##  8 https://archive.… CNNW_2018031… 2018-03-18 CNN     New D… 2018-03-18 https://archive.org/do… media. after comey lea…
254 |     ##  9 https://archive.… MSNBCW_20180… 2018-02-20 MSNBC   The B… 2018-02-20 https://archive.org/do… trump caused this inve…
255 |     ## 10 https://archive.… CNBC_2018041… 2018-04-13 CNBC    Power… 2018-04-13 https://archive.org/do… he is ego different an…
256 |     ## # ... with 22 more rows
257 | 
258 | 2018-04-13 | FOX News | Shepard Smith
259 | Reporting
260 | 
261 | <a href="https://archive.org/details/FOXNEWSW_20180413_190000_Shepard_Smith_Reporting#start/1876/end/1911"><img src="https://archive.org/download/FOXNEWSW_20180413_190000_Shepard_Smith_Reporting/FOXNEWSW_20180413_190000_Shepard_Smith_Reporting.thumbs/FOXNEWSW_20180413_190000_Shepard_Smith_Reporting_001857.jpg"></a>
262 | 
263 | comey – i mention it because comey is in the news. treats comey like a
264 | white knight and points out that director comey would have a vested
265 | interest in distancing himself from andrew mccabe because the inspector
266 | general was also looking at comey and
267 | his
268 | 
269 | ### “Word Cloud” (top associated words to the query)
270 | 
271 | ``` r
272 | wc <- query_tv('hannity Market:"National"', mode = "WordCloud", start_date = "2018-04-13")
273 | 
274 | ggplot(wc, aes(x=1, y=1)) +
275 |   ggrepel::geom_label_repel(aes(label=label, size=count), segment.colour="#00000000", segment.size=0) +
276 |   scale_size_continuous(trans="sqrt") +
277 |   labs(x=NULL, y=NULL) +
278 |   theme_ipsum_rc(grid="") +
279 |   theme(axis.text=element_blank()) +
280 |   theme(legend.position="none") 
281 | ```
282 | 
283 | ![](README_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
284 | 
285 | ### Last 15 Minutes Top Trending
286 | 
287 | ``` r
288 | gd_top_trending()
289 | ```
290 | 
291 |     ## $overall_trending_topics
292 |     ##  [1] "commonwealth"          "shirley"               "caribbean"             "florida"              
293 |     ##  [5] "jim comey"             "boston"                "sandra"                "nell"                 
294 |     ##  [9] "george stephanopoulos" "vincent kompany"       "pallab ghosh"          "brighthouse financial"
295 |     ## [13] "islamic state"         "wetherspoon"           "europe"                "sorrell"              
296 |     ## [17] "north carolina"        "nasa"                  "starbucks"             "pakistan"             
297 |     ## [21] "whitbread"             "cliff richard"         "asia"                  "hilary clinton"       
298 |     ## [25] "ghouta"                "kevin johnson"         "west"                  "philadelphia"         
299 |     ## [29] "renee"                 "zimbabwe"              "city"                  "bill chaplin"         
300 |     ## [33] "james"                 "grassley"              "quetta"                "myrbetriq"            
301 |     ## [37] "barbara"               "john heilemann"        "carrie underwood"      "joe"                  
302 |     ## [41] "houston"               "balochistan"           "ibm"                   "medicare"             
303 |     ## [45] "barclays"              "fidelity"              "jason aldean"          "rhonda"               
304 |     ## [49] "michael flynn"         "belfast"               "kohler"               
305 |     ## 
306 |     ## $station_trending_topics
307 |     ## # A tibble: 112 x 2
308 |     ##    station topic           
309 |     ##    <chr>   <chr>           
310 |     ##  1 CNN     brilinta        
311 |     ##  2 CNN     jim comey       
312 |     ##  3 CNN     christine       
313 |     ##  4 CNN     michael flynn   
314 |     ##  5 CNN     tremfya         
315 |     ##  6 CNN     tal             
316 |     ##  7 CNN     nick paton walsh
317 |     ##  8 CNN     geico           
318 |     ##  9 CNN     vladimir putin  
319 |     ## 10 CNN     lynch           
320 |     ## # ... with 102 more rows
321 |     ## 
322 |     ## $station_top_topics
323 |     ## # A tibble: 112 x 2
324 |     ##    station topic        
325 |     ##    <chr>   <chr>        
326 |     ##  1 CNN     fbi          
327 |     ##  2 CNN     russia       
328 |     ##  3 CNN     donald trump 
329 |     ##  4 CNN     james comey  
330 |     ##  5 CNN     mueller      
331 |     ##  6 CNN     syria        
332 |     ##  7 CNN     united states
333 |     ##  8 CNN     michael cohen
334 |     ##  9 CNN     clinton      
335 |     ## 10 CNN     cnn          
336 |     ## # ... with 102 more rows
337 |     ## 
338 |     ## $overall_trending_phrases
339 |     ##  [1] "morally unfit"                      "unfit to be president"              "good morning"                      
340 |     ##  [4] "medically unfit"                    "president of the united"            "islamic state group"               
341 |     ##  [7] "night sky"                          "bank of america"                    "xfinity delivers gig"              
342 |     ## [10] "give this guy gig-"                 "delivers gig speed"                 "give this guy"                     
343 |     ## [13] "gig speed"                          "speed to more homes"                "xfinity delivers gig speed"        
344 |     ## [16] "guy gig-"                           "treats women"                       "xfinity delivers"                  
345 |     ## [19] "donald trump"                       "gig-speed internet"                 "kennedy space centre"              
346 |     ## [22] "people watching"                    "threatens new sanctions"            "donald trump unfit"                
347 |     ## [25] "exclusive interview"                "evidence of obstruction"            "sees moral equivalence"            
348 |     ## [28] "100 years"                          "air strikes"                        "fit to be president"               
349 |     ## [31] "new york"                           "maintaining a level"                "shield annuity"                    
350 |     ## [34] "growth opportunities"               "lies constantly"                    "time to make"                      
351 |     ## [37] "level of protection"                "support for president assad"        "removing donald trump"             
352 |     ## [40] "support for president"              "buy the stuff"                      "2700 journalists"                  
353 |     ## [43] "pallab ghosh"                       "brighthouse financial- established" "mission to scan"                   
354 |     ## [46] "stars resonate"                     "voting booth"                       "star makes"                        
355 |     ## [49] "james comey comments"               "embody respect"                     "adhere to the values"
356 | 
357 | ### Top Overall Trending from the Internet Archive TV Archive (2017 and earlier)
358 | 
359 | ``` r
360 | iatv_top_trending("2017-12-01 18:00", "2017-12-02 06:00")
361 | ```
362 | 
363 |     ## # A tibble: 49 x 5
364 |     ##    ts                  overall_trending_topics station_trending_topics station_top_topics   overall_trending_phrases
365 |     ##    <dttm>              <list>                  <list>                  <list>               <list>                  
366 |     ##  1 2017-12-01 18:00:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
367 |     ##  2 2017-12-01 18:15:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
368 |     ##  3 2017-12-01 18:30:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
369 |     ##  4 2017-12-01 18:45:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
370 |     ##  5 2017-12-01 19:00:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
371 |     ##  6 2017-12-01 19:15:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
372 |     ##  7 2017-12-01 19:30:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
373 |     ##  8 2017-12-01 19:45:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
374 |     ##  9 2017-12-01 20:00:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
375 |     ## 10 2017-12-01 20:15:00 <chr [51]>              <data.frame [7 × 2]>    <data.frame [7 × 2]> <chr [51]>              
376 |     ## # ... with 39 more rows
377 | 


--------------------------------------------------------------------------------
/README_cache/gfm/__packages:
--------------------------------------------------------------------------------
 1 | base
 2 | newsflash
 3 | ggplot2
 4 | ggalt
 5 | hrbrthemes
 6 | tidyverse
 7 | tibble
 8 | tidyr
 9 | readr
10 | purrr
11 | dplyr
12 | stringr
13 | forcats
14 | bindrcpp
15 | 


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-10_b71663ca7f74ee9c9e0993f800680bdc.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-4_202c6a4374c7d2d43d1df0021f5e1de3.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-6_60e162ac3d416f213d19662cf1a02510.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-7_2f3c308173042d1baf25844e64d232cb.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-8_63ed08ea6bddbf23012e183bdb415c89.rdx


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.RData


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdb


--------------------------------------------------------------------------------
/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_cache/gfm/unnamed-chunk-9_8b52c64d46d2221a5b0cbdaefa9e655b.rdx


--------------------------------------------------------------------------------
/README_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/README_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/README_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/README_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/README_files/figure-markdown_github/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrbrmstr/newsflash/5075be6afdd214bdf257ebe87f525a09c9cb1b80/README_files/figure-markdown_github/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/man/gd_top_trending.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gd-top-trending.R
 3 | \name{gd_top_trending}
 4 | \alias{gd_top_trending}
 5 | \title{Top Trending (GDELT)}
 6 | \usage{
 7 | gd_top_trending()
 8 | }
 9 | \description{
10 | Retrieve current (last 15 minute) "top topics" being discussed on stations
11 | }
12 | 


--------------------------------------------------------------------------------
/man/iatv_top_trending.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/top-tending-range.r
 3 | \name{iatv_top_trending}
 4 | \alias{iatv_top_trending}
 5 | \title{Top Trending Topics (Internet Archive TV Archive)}
 6 | \usage{
 7 | iatv_top_trending(from, to, .progress = interactive())
 8 | }
 9 | \arguments{
10 | \item{from, to}{start and end date/time ranges (will auto-convert if properly formatted strings)}
11 | 
12 | \item{.progress}{show a progress bar? Defaukts to \code{TRUE} if in an interactive session.}
13 | }
14 | \description{
15 | Provide start & end times in current time zone and this function will generate
16 | the proper "every 15-second" values, convert them to GMT values and issue the queries,
17 | returning a nested data frame of results. If you want more control, use \code{\link[=top_trending]{top_trending()}}.
18 | }
19 | \details{
20 | GDELT now generates a snapshot every 15 minutes that records all of the "top trending"
21 | tables into a single archive enabling users to ook back over time at what was trending
22 | in 15 minute increments historically back to midnight on 2017-09-07.
23 | 
24 | Note that the archives are generated every 15 minutes based on the television shows that
25 | have completed processing at that time. It can take several hours for a show to be fully
26 | processed by the Internet Archive and available for processing, thus the presence/absence
27 | of a topic in these files should not be used to date it precisely to that 15 minute mark,
28 | but rather as a rough temporal indicator of what topics were trending up/down in that
29 | general time frame. For precise timelines, you should take a topic from this archive and
30 | run a search on it using the main Television Explorer interface, select a timeframe of
31 | 72 hours and use the resulting timeline to precisely date the topic's coverage (since
32 | the Explorer timeline is based on the broadcast timestamp of the show, even if it is
33 | processed hours later).
34 | }
35 | \note{
36 | The times are auto-converted to GMT
37 | }
38 | \examples{
39 | top_trending("2017-09-08 18:00", "2017-09-09 06:00")
40 | }
41 | 


--------------------------------------------------------------------------------
/man/list_chyrons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/list-chyrons.r
 3 | \name{list_chyrons}
 4 | \alias{list_chyrons}
 5 | \title{Retrieve Third Eye chyron index}
 6 | \usage{
 7 | list_chyrons()
 8 | }
 9 | \value{
10 | data frame with three columns:
11 | \itemize{
12 | \item \code{ts} (\code{POSIXct}) chyron timestamp
13 | \item \code{type} (\code{character}) \code{raw} or \code{cleaned}
14 | \item \code{size} (\code{numeric}) size of the feed file in bytes
15 | }
16 | }
17 | \description{
18 | Returns a data frame with available chyron dates & selected metadata.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/list_networks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/networks.r
 3 | \name{list_networks}
 4 | \alias{list_networks}
 5 | \title{Helper function to identify station/network keyword and corpus date range for said market}
 6 | \usage{
 7 | list_networks(widget = interactive())
 8 | }
 9 | \arguments{
10 | \item{widget}{if `TRUE` then an HTML widget will be displayed to make it easier to
11 | sift through stations/networks}
12 | }
13 | \value{
14 | data frame
15 | }
16 | \description{
17 | The \code{filter_network} of \code{query_tv()} is picky so this helps you idenitify the
18 | keyword to use for the particular network/station.
19 | }
20 | \details{
21 | The list also shows the date ranges available for the captions, so you can use that as
22 | a guide when picking dates.
23 | 
24 | In interactive mode it uses \code{DT::datatable()}. You can force it to just display to
25 | the console by passing in \code{widget=FALSE}
26 | }
27 | \examples{
28 | list_networks() # widget
29 | print(list_networks(FALSE)) # no widget
30 | }
31 | 


--------------------------------------------------------------------------------
/man/newsflash.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/newsflash-package.R
 3 | \docType{package}
 4 | \name{newsflash}
 5 | \alias{newsflash}
 6 | \alias{newsflash-package}
 7 | \title{Tools to Work with the Internet Archive and GDELT Television Explorer}
 8 | \description{
 9 | Tools to Work with the Internet Archive and GDELT Television Explorer
10 | }
11 | \author{
12 | Bob Rudis (bob@rud.is)
13 | }
14 | 


--------------------------------------------------------------------------------
/man/query_tv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/newsflash.r
 3 | \name{query_tv}
 4 | \alias{query_tv}
 5 | \title{Issue a query to the TV Explorer}
 6 | \usage{
 7 | query_tv(query, mode = c("TimelineVol", "StationChart", "TimelineVolNorm"),
 8 |   start_date = NULL, end_date = NULL, datanorm = c("perc", "raw"),
 9 |   timelinesmooth = 0, datacomb = c("separate", "combined"),
10 |   last_24 = c("yes", "no"))
11 | }
12 | \arguments{
13 | \item{query}{query string in GDELT format. See \code{QUERY} in https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/
14 | for details; use \code{\link[=list_networks]{list_networks()}} to obtain valid station/network identifiers. If
15 | no \code{Network:}, \code{Market:} or \code{Station:} qualifiers are found \code{Market:"National"} is automatically added.}
16 | 
17 | \item{mode}{See \code{Mode} section}
18 | 
19 | \item{start_date, end_date}{start/end dates. Leaving both \code{NULL} searches all archive history.
20 | Leaving just \code{start_date} \code{NULL} sets the start date to July 2009. Leaving just \code{end_date}
21 | \code{NULL} sets the end date to today.}
22 | 
23 | \item{datanorm}{normalized ("\code{perc}") vs "\code{raw}" counts; defaults to \code{perc}.}
24 | 
25 | \item{timelinesmooth}{a smoothing value applying moving averages over 15-minute increments}
26 | 
27 | \item{datacomb}{if "\code{combined}", all network volume is combined into a single value.
28 | Defaults to "\code{separate}".}
29 | 
30 | \item{last_24}{It can take the Internet Archive up to 24 hours to process a broadcast once
31 | it concludes. Thus, by default the TV API does not return results from the most recent
32 | 24 hours to ensure that analyses are not skewed by partial results. However, when
33 | tracking breaking news events, it may be desirable to view partial results with the
34 | understanding that any time or station-based trends may not accurately reflect the
35 | totality of their coverage. To include results from the most recent 24 hours,
36 | set this URL parameter to "yes".}
37 | }
38 | \value{
39 | Different objects for different \code{mode}s:
40 | \itemize{
41 | \item \code{TimelineVol} : a data frame with stations & counts (raw or normalied)
42 | \item \code{TimelineVolNorm} : a data frame of station & topic airtime
43 | \item \code{StationChart} : a data frame of stations and search result counts (raw or normalized)
44 | }
45 | }
46 | \description{
47 | NOTE: The \code{mode} parameter controls what is returned. See the section on \code{Mode} for more information on available modes.
48 | }
49 | \section{Mode}{
50 | 
51 | 
52 | This specifies the specific output you would like from the API, ranging from timelines to word clouds to clip galleries.
53 | \itemize{
54 | \item \code{TimelineVol}. (Default) This tracks how many results your search generates by day/hour over the selected time period, allowing you to assess the relative attention each is paying to the topic and how that attention has varied over time. Using the DATANORM parameter you can control whether this reports results as raw clip counts or as normalized percentages of all coverage (the most robust way of comparing stations). By default, the timeline will not display the most recent 24 hours, since those results are still being generated (it can take up to 2-12 hours for a show to be processed by the Internet Archive and ready for analysis), but you can include those if needed via the LAST24 option. You can also smooth the timeline using the TIMELINESMOOTH option and combine all selected stations into a single time series using the DATACOMB option.
55 | \item \code{StationChart}. This compares how many results your search generates from each of the selected stations over the selected time period, allowing you to assess the relative attention each is paying to the topic. Using the DATANORM parameter you can control whether this reports results as raw clip counts or as normalized percentages of all coverage (the most robust way of comparing stations).
56 | \item \code{TimelineVolNorm}. This displays the total airtime (in terms of 15 second clips) monitored from each of the stations in your query. It must be combined with a valid query, since it displays the airtime for the stations queried in the search. This mode can be used to identify brief monitoring outages or for advanced normalization, since it reports the total amount of clips monitored overall from each station in each day/hour.
57 | }
58 | }
59 | 
60 | \section{Queries}{
61 | 
62 | 
63 | The GDELT TV API supports keyword and keyphrase searches, OR statements and a variety of advanced operators. NOTE – all of the operators below must be used as part of the value of the QUERY field, separated by spaces, and cannot be used as URL parameters on their own.
64 | \itemize{
65 | \item \code{""}. Anything found inside of quote marks is treated as an exact phrase search. Thus, you can search for "Donald Trump" to find all matches of his name. (e.g  \code{"donald trump"})
66 | \item \code{(a OR b)}. You can specify a list of keywords to be boolean OR'd together by enclosing them in parentheses and placing the capitalized word "OR" between each keyword or phrase. Boolean OR blocks cannot be nested at this time. For example, to search for mentions of Clinton, Sanders or Trump, you would use "\code{(clinton OR sanders OR trump)}"
67 | \item \code{-}. You can place a minus sign in front of any operator, word or phrase to exclude it. For example "-sanders" would exclude results that contained "sanders" from your results. (e.g. \code{-sanders})
68 | \item \code{Context}. By default all of your keywords/phrases must appear in a single 15 second clip. (Phrases are allowed to span across two clips and are counted towards the clip they started in). The "context" operator allows you to require that a given keyword/phrase appears either in the 15 second clip or in the 15 second clips immediately before or after it. This gives you a bit of additional search fuzziness. Even when searching for a single word, it must appear in quote marks. (e.g. \code{context:"russia"})
69 | \item \code{Market}. This narrows your search to a particular geographic market. The list of available markets can be found via the Station Details mode (look for the city name in the description of local stations). Example markets include "San Francisco" and "Philadelphia". The market name must be enclosed in quote marks. You can also use the special reserved market "National" to search the major national networks together. (e.g. \code{market:"San Francisco"})
70 | \item \code{Network}. This narrows your search to a particular television network. The list of available networks can be found via the Station Details mode (look for the network name in the description of local stations). Example markets include "CBS" and "NBC". Do not use quote marks around the network name. (e.g. \code{network:CBS})
71 | \item Show. This narrows your search to a particular television show. This must be the complete show name as returned by the TV API. To find a particular show, search the API and use the "clipgallery" mode to display matching clips and their source show. For example, to limit your search to the show Hardball With Chris Matthews, you'd search for "show:"Hardball With Chris Matthews"". Note that you must surround the show name with quote marks. Remember that the TV API only searches shows monitored by the Internet Archive's Television News Archive, which may not include all shows. (e.g. \code{show:"Hardball With Chris Matthews"})
72 | \item \code{Station}. This narrows your search to a particular television station. Remember that the TV API only searches stations monitored by the Internet Archive's Television News Archive and not all of those stations have been monitored for the entire 2009-present time period. Do not use quote marks around the name of the station. To find the Station ID of a particular station, use the Station Details mode. (e.g. \code{station:CNN})
73 | }
74 | }
75 | 
76 | \examples{
77 | query_tv("(terror isis")
78 | query_tv("british prime minister")
79 | query_tv("mexican president")
80 | }
81 | \references{
82 | \url{https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/}
83 | }
84 | 


--------------------------------------------------------------------------------
/man/read_chyrons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/third-eye.r
 3 | \name{read_chyrons}
 4 | \alias{read_chyrons}
 5 | \title{Retrieve TV News Archive chyrons from the Internet Archive's Third Eye project}
 6 | \usage{
 7 | read_chyrons(chyron_day = Sys.Date() - 1, cleaned = TRUE)
 8 | }
 9 | \arguments{
10 | \item{chyron_day}{archive day (\code{Date} or \code{character}; if \code{character} should be
11 | in \code{YYYY-mm-dd} format)}
12 | 
13 | \item{cleaned}{logical, default \code{TRUE}. The "raw feed" option provides all of the
14 | OCR'ed text from chyrons at the rate of approximately one entry per second.
15 | The "clean feed" download provides the data feed that fuels the Third Eye
16 | Twitter bots; this has been filtered to find the most representative,
17 | clearest chyrons from a 60-second period, with no more than one entry/tweet per
18 | minute (though the duration may be shorter than 60 seconds.) The clean feed
19 | relies on algorithms that are a work in progress.}
20 | }
21 | \value{
22 | \code{NULL} on irrecoverable errors, otherwise a data frame with five columns:
23 | \itemize{
24 | \item \code{ts} (\code{POSIXct}) chyron timestamp
25 | \item \code{channel} (\code{character}) news channel the chyron appeared on
26 | \item \code{duration} (\code{integer}) see Description
27 | \item \code{details} (\code{character}) Internet Archive details path
28 | \item \code{text} (\code{character}) the chyron text
29 | }
30 | }
31 | \description{
32 | The TV News Archive's Third Eye project captures the chyrons–or narrative text–that appear on the lower third of TV news screens and turns them into downloadable data and a Twitter feed for research, journalism, online tools, and other projects. At project launch (September 2017) we are collecting chyrons from BBC News, CNN, Fox News, and MSNBC–more than four million collected over just two weeks. Chyrons have public value because:
33 | \itemize{
34 | \item Breaking news often appears on chyrons before TV newscasters begin reporting or video is available, whether it's a hurricane or a breaking political story.
35 | \item Which chyrons a TV news network chooses to display can reveal editorial decisions that can inform public understanding of how news is filtered for different audiences.
36 | \item Providing chyrons as data–and also on Twitter–in near real-time can serve as a alert system, showing how TV news stations are reporting the news. Often the chyrons are ahead of the general conversation on Twitter.
37 | }
38 | }
39 | \details{
40 | Some notes on the data
41 | \itemize{
42 | \item chyrons are derived in near real-time from the TV News Archive's collection of TV news. The constantly updating public collection contains 1.4 million TV news shows, some dating back to 2009.
43 | \item At launch, Third Eye captures four TV cable news channels: BBC News, CNN, Fox News, and MSNBC.
44 | \item Data can be affected by temporary collection outages, which typically can last minutes or hours, but rarely more.
45 | \item Dates/times are in UTC (Coordinated Universal Time).
46 | \item Because the size of the raw data is so large (about 20 megabytes per day), results are limited to seven days per request.
47 | \item Raw data collection began on August 25, 2017; the clean feed begins on September 7, 2017.
48 | \item "\code{duration}" column is in seconds–the amount of time that particular chyron appeared on the screen.
49 | }
50 | }
51 | \note{
52 | It is \emph{highly} recommended that you use the "clean" feed unless you're researching
53 | how to correct text. This package does it's best to read in the raw feed but
54 | it often contains embedded nulls and non-standard text encodings which
55 | make it difficult to process.
56 | }
57 | 


--------------------------------------------------------------------------------
/man/word_cloud.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/word-cloud.R
 3 | \name{word_cloud}
 4 | \alias{word_cloud}
 5 | \title{Retrieve top words that appear most frequently in clips matching your search}
 6 | \usage{
 7 | word_cloud(query, start_date = NULL, end_date = NULL)
 8 | }
 9 | \arguments{
10 | \item{query}{query string in GDELT format. See \code{QUERY} in https://blog.gdeltproject.org/gdelt-2-0-television-api-debuts/
11 | for details; use \code{\link[=list_networks]{list_networks()}} to obtain valid station/network identifiers}
12 | 
13 | \item{start_date, end_date}{start/end dates. Leaving both \code{NULL} searches all archive history.
14 | Leaving just \code{start_date} \code{NULL} sets the start date to July 2009. Leaving just \code{end_date}
15 | \code{NULL} sets the end date to today.}
16 | }
17 | \description{
18 | The API takes the 200 most relevant clips that match your search and returns the
19 | terms for a "word cloud" of up to the top 200 most frequent words that appeared in
20 | those clips (common stop words are automatically removed). This is a powerful way
21 | of understanding the topics and words dominating the relevant coverage and
22 | suggesting additional contextual search terms to narrow or evolve your search.
23 | Note that if there are too few matching clips for your query, the word cloud may
24 | be blank.
25 | }
26 | 


--------------------------------------------------------------------------------
/newsflash.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | StripTrailingWhitespace: Yes
16 | 
17 | BuildType: Package
18 | PackageUseDevtools: Yes
19 | PackageInstallArgs: --no-multiarch --with-keep.source
20 | PackageBuildArgs: --resave-data
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/test-all.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check("newsflash")
3 | 


--------------------------------------------------------------------------------
/tests/testthat/test-newsflash.R:
--------------------------------------------------------------------------------
1 | context("API functionality")
2 | test_that("API functionality", {
3 | 
4 |   #expect_that(some_function(), is_a("data.frame"))
5 | 
6 | })
7 | 


--------------------------------------------------------------------------------