├── .gitignore ├── R ├── sysdata.rda ├── legislatoR.R ├── cld_content.R ├── get_office.R ├── get_portrait.R ├── get_profession.R ├── get_traffic.R ├── get_social.R ├── get_history.R ├── get_core.R ├── get_political.R └── get_ids.R ├── images ├── sticker.jpg └── data-structure.png ├── vignettes ├── dpsi_example ├── sticker.jpg ├── parlspeech_example └── legislatoR.Rmd ├── CRAN-SUBMISSION ├── .Rbuildignore ├── NAMESPACE ├── legislatoR.Rproj ├── tests ├── testthat.R └── testthat │ ├── test-cld_content.R │ ├── test-errors.R │ ├── test-get_ids.R │ ├── test-get_history.R │ ├── test-get_office.R │ ├── test-get_social.R │ ├── test-get_traffic.R │ ├── test-get_portrait.R │ ├── test-get_political.R │ ├── test-get_profession.R │ └── test-get_core.R ├── SOURCES.md ├── source ├── sticker.R ├── packages.R ├── preparation_spain.R └── integration_czech.R ├── DESCRIPTION ├── man ├── cld_content.Rd ├── legislatoR.Rd ├── get_office.Rd ├── get_portrait.Rd ├── get_profession.Rd ├── get_traffic.Rd ├── get_social.Rd ├── get_history.Rd ├── get_core.Rd ├── get_political.Rd └── get_ids.Rd ├── NEWS.md ├── README.md └── GLOSSARY.md /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/R/sysdata.rda -------------------------------------------------------------------------------- /images/sticker.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/images/sticker.jpg -------------------------------------------------------------------------------- /vignettes/dpsi_example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/dpsi_example -------------------------------------------------------------------------------- /vignettes/sticker.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/sticker.jpg -------------------------------------------------------------------------------- /images/data-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/images/data-structure.png -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 1.1.0 2 | Date: 2023-03-31 20:38:53 UTC 3 | SHA: df17df975945a3ee6bb302e1be2b3a588d1d8065 4 | -------------------------------------------------------------------------------- /vignettes/parlspeech_example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/parlspeech_example -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^GLOSSARY\.md$ 4 | ^README\.md$ 5 | ^SOURCES\.md$ 6 | ^images$ 7 | ^source$ 8 | ^workshop$ 9 | ^\.travis\.yml$ 10 | ^CRAN-SUBMISSION$ 11 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(cld_content) 4 | export(get_core) 5 | export(get_history) 6 | export(get_ids) 7 | export(get_office) 8 | export(get_political) 9 | export(get_portrait) 10 | export(get_profession) 11 | export(get_social) 12 | export(get_traffic) 13 | import(dplyr) 14 | importFrom(curl,nslookup) 15 | -------------------------------------------------------------------------------- /legislatoR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(legislatoR) 3 | 4 | test_check("legislatoR", filter = "cld_content") 5 | test_check("legislatoR", filter = "errors") 6 | test_check("legislatoR", filter = "get_core") 7 | test_check("legislatoR", filter = "get_history") 8 | test_check("legislatoR", filter = "get_ids") 9 | test_check("legislatoR", filter = "get_office") 10 | test_check("legislatoR", filter = "get_political") 11 | test_check("legislatoR", filter = "get_portrait") 12 | test_check("legislatoR", filter = "get_profession") 13 | test_check("legislatoR", filter = "get_social") 14 | test_check("legislatoR", filter = "get_traffic") 15 | 16 | -------------------------------------------------------------------------------- /SOURCES.md: -------------------------------------------------------------------------------- 1 | * Additional religious affiliations from http://www.adherents.com/adh_congress.html
2 | * [Face++ Cognitive Services API](https://www.faceplusplus.com/)
3 | * [Czech Republic Parliamentary Members Archive](http://public.psp.cz/sqw/fsnem.sqw?zvo=1)
4 | * [Germany Bundestag Open Data](https://www.bundestag.de/service/opendata)
5 | * [Spain Parliamentary Members Archive](http://www.congreso.es/portal/page/portal/Congreso/Congreso/Diputados)
6 | * Additional Twitter handles from https://github.com/oduwsdl/US-Congress
7 | * Additional Twitter handles provided by Bruno Castanho Silva and Sven-Oliver Proksch
8 | * [Wikimedia Commons](https://commons.wikimedia.org/)
9 | * [Wikimedia API](https://wikimedia.org/)
10 | * [Wikidata API](https://www.wikidata.org/)
11 | * [Wikipedia](https://de.wikipedia.org/)
12 | * [Wikipedia API](https://en.wikipedia.org/w/api.php) 13 | -------------------------------------------------------------------------------- /source/sticker.R: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------- 2 | # legislatoR 3 | # Sascha Göbel and Simon Munzert 4 | # Script: logo 5 | # December 2017 6 | # --------------------------------------------------------------------------------------- 7 | 8 | library(hexSticker) 9 | library(showtext) 10 | 11 | setwd("D:/Sascha/projects/legislatoR/images") 12 | 13 | 14 | font_add_google("IM Fell French Canon SC", "political") 15 | 16 | 17 | 18 | sticker(subplot = "new_logo.png", 19 | package = "legislatoR", 20 | p_x = 1, 21 | p_y = 1.45, 22 | p_color = "black", 23 | p_size = 70, 24 | p_family = "political", 25 | h_size = 1.5, 26 | h_fill = "white", 27 | h_color = "black", 28 | asp = 8, 29 | s_x = 1, 30 | s_y = 0.7, 31 | spotlight = FALSE, 32 | filename = "sticker.jpg", 33 | white_around_sticker = TRUE, 34 | dpi = 1000 35 | ) 36 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: legislatoR 2 | Title: Interface to the Comparative Legislators Database 3 | Description: Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 67,000 contemporary and historical politicians from 16 countries. 4 | Version: 1.1.0 5 | Authors@R: c( 6 | person("Sascha", "Goebel", email = "sascha.goebel@soz.uni-frankfurt.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9032-5874")), 7 | person("Simon", "Munzert", role = "aut")) 8 | URL: https://github.com/saschagobel/legislatoR 9 | BugReports: https://github.com/saschagobel/legislatoR/issues 10 | License: GPL-3 11 | Depends: 12 | R (>= 3.5.0) 13 | Imports: 14 | curl (>= 3.0), 15 | dplyr (>= 0.7.4) 16 | Suggests: 17 | testthat, 18 | rmarkdown, 19 | knitr, 20 | magrittr, 21 | purrr, 22 | stringr, 23 | tibble 24 | VignetteBuilder: 25 | knitr 26 | Encoding: UTF-8 27 | RoxygenNote: 7.1.2 28 | -------------------------------------------------------------------------------- /source/packages.R: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------- 2 | # legislatoR 3 | # Sascha Göbel, Simon Munzert 4 | # Script: Packages 5 | # December 2017 6 | # --------------------------------------------------------------------------------------- 7 | 8 | 9 | #### INSTALL AND LOAD PACKAGES ========================================================== 10 | 11 | # install pacman package if not installed ----------------------------------------------- 12 | suppressWarnings(if (!require("pacman")) install.packages("pacman")) 13 | 14 | # load packages and install if not installed -------------------------------------------- 15 | pacman::p_load(stringr, lubridate, magrittr, plyr, dplyr, eeptools, httr, 16 | rvest, toOrdinal, mpoly, data.table, zoo, jsonlite, R.utils, 17 | WikidataR, tibble, pageviews, wikipediatrend, padr, gtools,readxl, haven, 18 | tidyselect, reshape2, ggplot2, extrafont, finalfit, purrr, vroom, 19 | install = TRUE, 20 | update = FALSE) 21 | 22 | # show loaded packages ------------------------------------------------------------------ 23 | cat("loaded packages\n") 24 | print(pacman::p_loaded()) 25 | -------------------------------------------------------------------------------- /R/legislatoR.R: -------------------------------------------------------------------------------- 1 | #' legislatoR 2 | #' 3 | #' Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries. 4 | #' 5 | #' @section legislatoR functions: 6 | #' 7 | #' \code{\link{cld_content}}: returns a named list of legislatures and sessions available in the CLD.\cr 8 | #' 9 | #' \code{\link{get_core}}: fetches sociodemographic data of legislators.\cr 10 | #' 11 | #' \code{\link{get_political}}: fetches political data of legislators.\cr 12 | #' 13 | #' \code{\link{get_history}}: fetches full revision histories of legislators' Wikipedia biographies.\cr 14 | #' 15 | #' \code{\link{get_traffic}}: fetches daily user traffic on legislators' Wikipedia biographies.\cr 16 | #' 17 | #' \code{\link{get_social}}: fetches social media handles and website URLs of legislators.\cr 18 | #' 19 | #' \code{\link{get_portrait}}: fetches portrait urls of legislators.\cr 20 | #' 21 | #' \code{\link{get_office}}: fetches political and other offices of legislators.\cr 22 | #' 23 | #' \code{\link{get_profession}} fetches occupational data of legislators.\cr 24 | #' 25 | #' \code{\link{get_ids}}: fetches a range of IDs of legislators.\cr 26 | #' 27 | #' @docType package 28 | #' @name legislatoR 29 | NULL 30 | -------------------------------------------------------------------------------- /man/cld_content.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cld_content.R 3 | \name{cld_content} 4 | \alias{cld_content} 5 | \title{List content of the CLD} 6 | \usage{ 7 | cld_content(legislature = NULL) 8 | } 9 | \arguments{ 10 | \item{legislature}{An optional character string specifying one or more legislatures. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. If NULL (the default), a list with all legislatures and sessions available in the CLD is returned.} 11 | } 12 | \value{ 13 | A list with names being three-letter country codes and with each element containing a vector that shows the sessions available for a legislature. 14 | } 15 | \description{ 16 | Returns a named list of legislatures and sessions available in the CLD. This provides a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions. 17 | } 18 | \examples{ 19 | # Get a list of three-letter country codes and available sessions for all countries 20 | overview <- cld_content() 21 | tibble::glimpse(overview) 22 | 23 | # Get a list of available sessions for the French Assemblée and the Irish Dail 24 | sessions <- cld_content(legislature = c("fra", "irl")) 25 | tibble::glimpse(sessions) 26 | } 27 | -------------------------------------------------------------------------------- /man/legislatoR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/legislatoR.R 3 | \docType{package} 4 | \name{legislatoR} 5 | \alias{legislatoR} 6 | \title{legislatoR} 7 | \description{ 8 | Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries. 9 | } 10 | \section{legislatoR functions}{ 11 | 12 | 13 | \code{\link{cld_content}}: returns a named list of legislatures and sessions available in the CLD.\cr 14 | 15 | \code{\link{get_core}}: fetches sociodemographic data of legislators.\cr 16 | 17 | \code{\link{get_political}}: fetches political data of legislators.\cr 18 | 19 | \code{\link{get_history}}: fetches full revision histories of legislators' Wikipedia biographies.\cr 20 | 21 | \code{\link{get_traffic}}: fetches daily user traffic on legislators' Wikipedia biographies.\cr 22 | 23 | \code{\link{get_social}}: fetches social media handles and website URLs of legislators.\cr 24 | 25 | \code{\link{get_portrait}}: fetches portrait urls of legislators.\cr 26 | 27 | \code{\link{get_office}}: fetches political and other offices of legislators.\cr 28 | 29 | \code{\link{get_profession}} fetches occupational data of legislators.\cr 30 | 31 | \code{\link{get_ids}}: fetches a range of IDs of legislators.\cr 32 | } 33 | 34 | -------------------------------------------------------------------------------- /tests/testthat/test-cld_content.R: -------------------------------------------------------------------------------- 1 | test_that("cld_content() returns a named list of integer vectors", { 2 | skip_on_cran() 3 | expect_identical(class(cld_content()), "list") 4 | expect_identical(class(sample(cld_content(), 1)[[1]]), "integer") 5 | }) 6 | 7 | test_that("cld_content() works with valid country codes", { 8 | skip_on_cran() 9 | expect_identical(length(cld_content("aut")[[1]]), 27L) 10 | expect_identical(length(cld_content("can")[[1]]), 44L) 11 | expect_identical(length(cld_content("cze")[[1]]), 9L) 12 | expect_identical(length(cld_content("esp")[[1]]), 14L) 13 | expect_identical(length(cld_content("fra")[[1]]), 16L) 14 | expect_identical(length(cld_content("deu")[[1]]), 20L) 15 | expect_identical(length(cld_content("irl")[[1]]), 33L) 16 | expect_identical(length(cld_content("sco")[[1]]), 6L) 17 | expect_identical(length(cld_content("gbr")[[1]]), 58L) 18 | expect_identical(length(cld_content("usa_house")[[1]]), 117L) 19 | expect_identical(length(cld_content("usa_senate")[[1]]), 117L) 20 | }) 21 | 22 | test_that("cld_content() works with multiple country codes", { 23 | skip_on_cran() 24 | expect_silent(cld_content(c("aut", "deu"))) 25 | expect_silent(cld_content(c("cze", "sco", "gbr"))) 26 | expect_silent(cld_content(c("usa_house", "fra", "esp", "can"))) 27 | }) 28 | 29 | test_that("Error is returned when legislature argument is not a valid country code", { 30 | skip_on_cran() 31 | expect_error(cld_content(NA)) 32 | expect_error(cld_content("bla")) 33 | expect_error(cld_content(c("deu", "bla"))) 34 | expect_error(cld_content(2)) 35 | expect_error(cld_content(TRUE)) 36 | }) 37 | -------------------------------------------------------------------------------- /man/get_office.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_office.R 3 | \name{get_office} 4 | \alias{get_office} 5 | \title{Fetch 'Offices' table} 6 | \format{ 7 | Data frame in wide format with columns (varies by legislature): 8 | \itemize{ 9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | \item{office_1: political or other office held by a legislator (of class \sQuote{logical}).} 11 | \item{office_2: ... (of class \sQuote{logical}).} 12 | \item{...} 13 | } 14 | } 15 | \source{ 16 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 17 | } 18 | \usage{ 19 | get_office(legislature) 20 | } 21 | \arguments{ 22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 23 | } 24 | \value{ 25 | A data frame with columns as specified above. 26 | } 27 | \description{ 28 | Fetches political and other offices of legislators for the specified legislature. Requires a working Internet connection. 29 | } 30 | \examples{ 31 | \donttest{# Get entire 'Offices' table for the United States Senate 32 | usa_offices <- get_office(legislature = "usa_senate") 33 | tibble::glimpse(usa_offices) 34 | 35 | # Get 'Offices' table for male members of the United States Senate 36 | usa_offices_subset <- dplyr::semi_join(x = usa_offices, 37 | y = dplyr::filter(get_core(legislature = "usa_senate"), 38 | sex == "female"), 39 | by = "wikidataid") 40 | tibble::glimpse(usa_offices_subset) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /man/get_portrait.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_portrait.R 3 | \name{get_portrait} 4 | \alias{get_portrait} 5 | \title{Fetch 'Portrait' table} 6 | \format{ 7 | Data frame with columns (varies by legislature): 8 | \itemize{ 9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | \item{image_url: URL linking to a legislator's portrait on Wikimedia Commons (of class \sQuote{character}).} 11 | } 12 | } 13 | \source{ 14 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr 15 | Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} 16 | } 17 | \usage{ 18 | get_portrait(legislature) 19 | } 20 | \arguments{ 21 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 22 | } 23 | \value{ 24 | A data frame with columns as specified above. 25 | } 26 | \description{ 27 | Fetches portrait urls of legislators for the specified legislature. Requires a working Internet connection. 28 | } 29 | \examples{ 30 | \donttest{# Get entire 'Portraits' table for the United States Senate 31 | usa_portraits <- get_portrait(legislature = "usa_senate") 32 | tibble::glimpse(usa_portraits) 33 | 34 | # Get 'Portraits' table for Democratic members of the United States Senate 35 | usa_port_subset <- dplyr::semi_join(x = usa_portraits, 36 | y = dplyr::filter(get_political(legislature = "usa_senate"), 37 | party == "D"), 38 | by = "pageid") 39 | tibble::glimpse(usa_port_subset) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/get_profession.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_profession.R 3 | \name{get_profession} 4 | \alias{get_profession} 5 | \title{Fetch 'Professions' table} 6 | \format{ 7 | Data frame in wide format with columns (varies by legislature): 8 | \itemize{ 9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | \item{occupation_1: occupation a legislator practiced or was trained in (of class \sQuote{logical}).} 11 | \item{occupation_2: ... (of class \sQuote{logical}).} 12 | \item{...} 13 | } 14 | } 15 | \source{ 16 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 17 | } 18 | \usage{ 19 | get_profession(legislature) 20 | } 21 | \arguments{ 22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 23 | } 24 | \value{ 25 | A data frame with columns as specified above. 26 | } 27 | \description{ 28 | Fetches occupational data of legislators for the specified legislature. Requires a working Internet connection. 29 | } 30 | \examples{ 31 | \donttest{# Get entire 'Professions' table for the United States House 32 | usa_offices <- get_office(legislature = "usa_house") 33 | tibble::glimpse(usa_offices) 34 | 35 | # Get 'Professions' table for female members of the United States House 36 | usa_offices_subset <- dplyr::semi_join(x = usa_offices, 37 | y = dplyr::filter(get_core(legislature = "usa_house"), 38 | sex == "female"), 39 | by = "wikidataid") 40 | tibble::glimpse(usa_offices_subset) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /man/get_traffic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_traffic.R 3 | \name{get_traffic} 4 | \alias{get_traffic} 5 | \title{Fetch Wikipedia 'Traffic' table} 6 | \format{ 7 | Data frame with columns: 8 | \itemize{ 9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | \item{date: Date for which user traffic is recorded, from 2015-07-01 to 2018-12-31 UTC (of class \sQuote{POSIXct}).} 11 | \item{traffic: Daily non-unique user visits (of class \sQuote{numeric}).} 12 | } 13 | } 14 | \source{ 15 | Wikimedia API, \url{https://wikimedia.org/api/rest_v1/} \cr 16 | \url{http://petermeissner.de:8880/} 17 | } 18 | \usage{ 19 | get_traffic(legislature) 20 | } 21 | \arguments{ 22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 23 | } 24 | \value{ 25 | A data frame with columns as specified above. 26 | } 27 | \description{ 28 | Fetches daily user traffic on legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection. 29 | } 30 | \examples{ 31 | \donttest{# Get entire 'Traffic' table for the Scottish Parliament 32 | sco_traffic <- get_traffic(legislature = "sco") 33 | tibble::glimpse(sco_traffic) 34 | 35 | # Add Wikidataid to 'Traffic' table for the Scottish Parliament 36 | sco_traffic_subset <- dplyr::inner_join(x = dplyr::select(get_core(legislature = "sco"), 37 | pageid, wikidataid), 38 | y = sco_traffic, 39 | by = "pageid") 40 | tibble::glimpse(sco_traffic_subset) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /man/get_social.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_social.R 3 | \name{get_social} 4 | \alias{get_social} 5 | \title{Fetch 'Social' table} 6 | \format{ 7 | Data frame with columns (varies by legislature): 8 | \itemize{ 9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | \item{twitter: Twitter handle (of class \sQuote{character}).} 11 | \item{facebook: Facebook handle (of class \sQuote{character}).} 12 | \item{youtube: Youtube ID (of class \sQuote{character}).} 13 | \item{googlep: Google Plus ID (of class \sQuote{character}).} 14 | \item{instagram: Instagram handle (of class \sQuote{character}).} 15 | \item{linkedin: LinkedIn ID (of class \sQuote{character}).} 16 | \item{website: Personal website URL (of class \sQuote{character}).} 17 | } 18 | } 19 | \source{ 20 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 21 | } 22 | \usage{ 23 | get_social(legislature) 24 | } 25 | \arguments{ 26 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 27 | } 28 | \value{ 29 | A data frame with columns as specified above. 30 | } 31 | \description{ 32 | Fetches social media handles and website URLs of legislators for the specified legislature. Requires a working Internet connection. 33 | } 34 | \examples{ 35 | \donttest{# Get entire 'Social' table forthe UK House of Commons 36 | gbr_social <- get_social(legislature = "gbr") 37 | tibble::glimpse(gbr_social) 38 | 39 | # Get 'Social' table for members of the UK House of Commons with available TheyWorkForYou ID 40 | gbr_social_subset <- dplyr::semi_join(x = gbr_social, 41 | y = dplyr::filter(get_ids(legislature = "gbr"), 42 | !is.na(theyworkforyou)), 43 | by = "wikidataid") 44 | tibble::glimpse(gbr_social_subset) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/get_history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_history.R 3 | \name{get_history} 4 | \alias{get_history} 5 | \title{Fetch Wikipedia 'History' table} 6 | \format{ 7 | Data frame with columns: 8 | \itemize{ 9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | \item{revid: Wikipedia edit ID (of class \sQuote{integer}).} 11 | \item{parentid: Wikipedia edit ID of the previous revision (of class \sQuote{integer}).} 12 | \item{user: Username of registered user responsible for the revision, IP address in case of anonymous revision (of class \sQuote{character}).} 13 | \item{userid: ID of registered user responsible for the revision, 0 in case of anonymous revision (of class \sQuote{integer}).} 14 | \item{timestamp: Date and time of the revision (of class \sQuote{POSIXct}).} 15 | \item{size: Revision size in bytes (of class \sQuote{integer}).} 16 | \item{comment: Revision comment (of class \sQuote{character}).} 17 | } 18 | } 19 | \source{ 20 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php} 21 | } 22 | \usage{ 23 | get_history(legislature) 24 | } 25 | \arguments{ 26 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 27 | } 28 | \value{ 29 | A data frame with columns as specified above. 30 | } 31 | \description{ 32 | Fetches full revision histories of legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection. 33 | } 34 | \examples{ 35 | \donttest{# Get entire 'History' table for the Austrian Nationalrat 36 | aut_history <- get_history(legislature = "aut") 37 | tibble::glimpse(aut_history) 38 | 39 | # Get 'History' table for NEOS party members of the Austrian Nationalrat 40 | aut_history_subset <- dplyr::semi_join(x = aut_history, 41 | y = dplyr::filter(get_political(legislature = "aut"), 42 | party == "NEOS"), 43 | by = "pageid") 44 | tibble::glimpse(aut_history_subset) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test-errors.R: -------------------------------------------------------------------------------- 1 | test_that("Error is returned when legislature argument is not specified", { 2 | skip_on_cran() 3 | expect_error(get_core()) 4 | expect_error(get_political()) 5 | expect_error(get_history()) 6 | expect_error(get_traffic()) 7 | expect_error(get_social()) 8 | expect_error(get_portrait()) 9 | expect_error(get_office()) 10 | expect_error(get_profession()) 11 | expect_error(get_ids()) 12 | }) 13 | 14 | test_that("Error is returned when legislature argument is not a valid country code", { 15 | skip_on_cran() 16 | expect_error(get_core(NA)) 17 | expect_error(get_political(NA)) 18 | expect_error(get_history(NA)) 19 | expect_error(get_traffic(NA)) 20 | expect_error(get_social(NA)) 21 | expect_error(get_portrait(NA)) 22 | expect_error(get_office(NA)) 23 | expect_error(get_profession(NA)) 24 | expect_error(get_ids(NA)) 25 | 26 | expect_error(get_core("bla")) 27 | expect_error(get_political("bla")) 28 | expect_error(get_history("bla")) 29 | expect_error(get_traffic("bla")) 30 | expect_error(get_social("bla")) 31 | expect_error(get_portrait("bla")) 32 | expect_error(get_office("bla")) 33 | expect_error(get_profession("bla")) 34 | expect_error(get_ids("bla")) 35 | 36 | expect_error(get_core(2)) 37 | expect_error(get_political(2)) 38 | expect_error(get_history(2)) 39 | expect_error(get_traffic(2)) 40 | expect_error(get_social(2)) 41 | expect_error(get_portrait(2)) 42 | expect_error(get_office(2)) 43 | expect_error(get_profession(2)) 44 | expect_error(get_ids(2)) 45 | 46 | expect_error(get_core(TRUE)) 47 | expect_error(get_political(TRUE)) 48 | expect_error(get_history(TRUE)) 49 | expect_error(get_traffic(TRUE)) 50 | expect_error(get_social(TRUE)) 51 | expect_error(get_portrait(TRUE)) 52 | expect_error(get_office(TRUE)) 53 | expect_error(get_profession(TRUE)) 54 | expect_error(get_ids(TRUE)) 55 | }) 56 | 57 | test_that("Error is returned when more than one country code is specified in legislature argument ", { 58 | skip_on_cran() 59 | expect_error(get_core(c("aut","deu"))) 60 | expect_error(get_political(c("aut","deu"))) 61 | expect_error(get_history(c("aut","deu"))) 62 | expect_error(get_traffic(c("aut","deu"))) 63 | expect_error(get_social(c("aut","deu"))) 64 | expect_error(get_portrait(c("aut","deu"))) 65 | expect_error(get_office(c("aut","deu"))) 66 | expect_error(get_profession(c("aut","deu"))) 67 | expect_error(get_ids(c("aut","deu"))) 68 | }) 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_ids.R: -------------------------------------------------------------------------------- 1 | test_that("IDs table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_ids("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_ids("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_ids("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_ids("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_ids("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_ids("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_ids("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_ids("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_ids("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_ids("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_ids("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_ids("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_ids("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_ids("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_ids("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_ids("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_ids("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_ids("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_ids("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_ids("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_ids("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_ids("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_ids("aut"))[1], c("wikidataid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_ids("can"))[1], c("wikidataid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_ids("cze"))[1], c("wikidataid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_ids("esp"))[1], c("wikidataid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_ids("fra"))[1], c("wikidataid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_ids("deu"))[1], c("wikidataid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_ids("irl"))[1], c("wikidataid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_ids("sco"))[1], c("wikidataid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_ids("gbr"))[1], c("wikidataid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_ids("usa_house"))[1], c("wikidataid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_ids("usa_senate"))[1], c("wikidataid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_history.R: -------------------------------------------------------------------------------- 1 | test_that("Wikipedia History table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_history("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_history("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_history("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_history("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_history("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_history("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_history("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_history("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_history("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_history("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_history("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_history("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_history("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_history("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_history("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_history("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_history("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_history("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_history("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_history("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_history("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_history("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_history("aut"))[1], c("pageid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_history("can"))[1], c("pageid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_history("cze"))[1], c("pageid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_history("esp"))[1], c("pageid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_history("fra"))[1], c("pageid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_history("deu"))[1], c("pageid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_history("irl"))[1], c("pageid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_history("sco"))[1], c("pageid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_history("gbr"))[1], c("pageid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_history("usa_house"))[1], c("pageid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_history("usa_senate"))[1], c("pageid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_office.R: -------------------------------------------------------------------------------- 1 | test_that("Offices table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_office("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_office("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_office("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_office("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_office("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_office("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_office("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_office("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_office("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_office("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_office("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_office("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_office("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_office("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_office("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_office("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_office("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_office("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_office("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_office("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_office("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_office("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_office("aut"))[1], c("wikidataid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_office("can"))[1], c("wikidataid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_office("cze"))[1], c("wikidataid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_office("esp"))[1], c("wikidataid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_office("fra"))[1], c("wikidataid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_office("deu"))[1], c("wikidataid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_office("irl"))[1], c("wikidataid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_office("sco"))[1], c("wikidataid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_office("gbr"))[1], c("wikidataid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_office("usa_house"))[1], c("wikidataid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_office("usa_senate"))[1], c("wikidataid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_social.R: -------------------------------------------------------------------------------- 1 | test_that("Social table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_social("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_social("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_social("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_social("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_social("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_social("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_social("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_social("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_social("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_social("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_social("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_social("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_social("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_social("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_social("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_social("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_social("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_social("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_social("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_social("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_social("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_social("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_social("aut"))[1], c("wikidataid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_social("can"))[1], c("wikidataid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_social("cze"))[1], c("wikidataid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_social("esp"))[1], c("wikidataid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_social("fra"))[1], c("wikidataid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_social("deu"))[1], c("wikidataid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_social("irl"))[1], c("wikidataid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_social("sco"))[1], c("wikidataid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_social("gbr"))[1], c("wikidataid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_social("usa_house"))[1], c("wikidataid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_social("usa_senate"))[1], c("wikidataid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_traffic.R: -------------------------------------------------------------------------------- 1 | test_that("Wikipedia Traffic table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_traffic("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_traffic("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_traffic("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_traffic("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_traffic("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_traffic("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_traffic("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_traffic("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_traffic("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_traffic("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_traffic("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_traffic("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_traffic("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_traffic("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_traffic("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_traffic("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_traffic("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_traffic("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_traffic("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_traffic("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_traffic("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_traffic("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_traffic("aut"))[1], c("pageid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_traffic("can"))[1], c("pageid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_traffic("cze"))[1], c("pageid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_traffic("esp"))[1], c("pageid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_traffic("fra"))[1], c("pageid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_traffic("deu"))[1], c("pageid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_traffic("irl"))[1], c("pageid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_traffic("sco"))[1], c("pageid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_traffic("gbr"))[1], c("pageid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_traffic("usa_house"))[1], c("pageid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_traffic("usa_senate"))[1], c("pageid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_portrait.R: -------------------------------------------------------------------------------- 1 | test_that("Portraits table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_portrait("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_portrait("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_portrait("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_portrait("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_portrait("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_portrait("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_portrait("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_portrait("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_portrait("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_portrait("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_portrait("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_portrait("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_portrait("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_portrait("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_portrait("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_portrait("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_portrait("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_portrait("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_portrait("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_portrait("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_portrait("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_portrait("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_portrait("aut"))[1], c("pageid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_portrait("can"))[1], c("pageid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_portrait("cze"))[1], c("pageid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_portrait("esp"))[1], c("pageid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_portrait("fra"))[1], c("pageid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_portrait("deu"))[1], c("pageid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_portrait("irl"))[1], c("pageid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_portrait("sco"))[1], c("pageid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_portrait("gbr"))[1], c("pageid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_portrait("usa_house"))[1], c("pageid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_portrait("usa_senate"))[1], c("pageid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-get_political.R: -------------------------------------------------------------------------------- 1 | test_that("Political table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_political("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_political("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_political("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_political("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_political("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_political("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_political("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_political("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_political("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_political("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_political("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_political("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_political("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_political("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_political("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_political("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_political("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_political("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_political("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_political("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_political("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_political("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_political("aut"))[1], c("pageid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_political("can"))[1], c("pageid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_political("cze"))[1], c("pageid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_political("esp"))[1], c("pageid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_political("fra"))[1], c("pageid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_political("deu"))[1], c("pageid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_political("irl"))[1], c("pageid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_political("sco"))[1], c("pageid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_political("gbr"))[1], c("pageid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_political("usa_house"))[1], c("pageid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_political("usa_senate"))[1], c("pageid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /R/cld_content.R: -------------------------------------------------------------------------------- 1 | #' List content of the CLD 2 | #' 3 | #' Returns a named list of legislatures and sessions available in the CLD. This provides a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions. 4 | #' 5 | #' @param legislature An optional character string specifying one or more legislatures. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. If NULL (the default), a list with all legislatures and sessions available in the CLD is returned. 6 | #' @return A list with names being three-letter country codes and with each element containing a vector that shows the sessions available for a legislature. 7 | #' @examples 8 | #' # Get a list of three-letter country codes and available sessions for all countries 9 | #' overview <- cld_content() 10 | #' tibble::glimpse(overview) 11 | #' 12 | #' # Get a list of available sessions for the French Assemblée and the Irish Dail 13 | #' sessions <- cld_content(legislature = c("fra", "irl")) 14 | #' tibble::glimpse(sessions) 15 | #' @export 16 | cld_content <- function(legislature = NULL) { 17 | output <- list(aut = 1:27, bra = 38:57, can = 1:44, cze = 1:9, 18 | deu = 1:20, esp = 1:14, fra = 1:16, gbr = 1:58, 19 | irl = 1:33, isr = 1:25, ita_house = 1:19, ita_senate = 1:19, 20 | jpn = 1:49, nld = 1:65, sco = 1:6, tur = 1:27, 21 | usa_house = 1:117, usa_senate = 1:117) 22 | if (is.null(legislature)) { 23 | return(output) 24 | } else { 25 | if (any(!(legislature %in% c("aut", "bra", "can", "cze", 26 | "deu", "esp", "fra", "gbr", 27 | "irl", "isr", "ita_house", "ita_senate", 28 | "jpn", "nld", "sco", "tur", 29 | "usa_house", "usa_senate")))) { 30 | legislature <- legislature[which(!(legislature %in% c("aut", "bra", "can", "cze", 31 | "deu", "esp", "fra", "gbr", 32 | "irl", "isr", "ita_house", "ita_senate", 33 | "jpn", "nld", "sco", "tur", 34 | "usa_house", "usa_senate")))] 35 | stop (paste0("\n\nPlease provide valid three-letter country codes. legislatoR does not recognize the country code or does not contain data for ", 36 | paste0( 37 | paste0("\"", legislature, "\""), 38 | collapse = ", "), 39 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 40 | } 41 | output <- output[legislature] 42 | } 43 | return(output) 44 | } 45 | -------------------------------------------------------------------------------- /man/get_core.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_core.R 3 | \name{get_core} 4 | \alias{get_core} 5 | \title{Fetch 'Core' table} 6 | \format{ 7 | Data frame with columns (varies by legislature): 8 | \itemize{ 9 | \item{country: ISO 3166-1 alpha-3 three-letter country code (of class \sQuote{character}).} 10 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer} or \sQuote{character}).} 11 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 12 | \item{wikititle: A legislator's undirected Wikipedia title (of class \sQuote{character}).} 13 | \item{name: A legislator's full name (of class \sQuote{character}).} 14 | \item{sex: A legislator's sex (of class \sQuote{character}).} 15 | \item{ethnicity: A legislator's ethnicity (of class \sQuote{character}).} 16 | \item{religion: A legislator's religious denomination (of class \sQuote{character}).} 17 | \item{birth: A legislator's date of birth (of class \sQuote{POSIXct}).} 18 | \item{death: A legislator's date of death (of class \sQuote{POSIXct}).} 19 | \item{birthplace: Comma separated latitude and longitude of a legislator's place of birth (of class \sQuote{character}).} 20 | \item{deathplace: Comma separated latitude and longitude of a legislator's place of death (of class \sQuote{character}).} 21 | } 22 | } 23 | \source{ 24 | Wikipedia, \url{https://www.wikipedia.org/} \cr 25 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr 26 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} \cr 27 | Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} \cr 28 | Face++ Cognitive Services API, \url{https://www.faceplusplus.com/} \cr 29 | Germany Bundestag Open Data, \url{https://www.bundestag.de/services/opendata} 30 | } 31 | \usage{ 32 | get_core(legislature) 33 | } 34 | \arguments{ 35 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 36 | } 37 | \value{ 38 | A data frame with columns as specified above. 39 | } 40 | \description{ 41 | Fetches sociodemographic data of legislators for the specified legislature. Requires a working Internet connection. 42 | } 43 | \examples{ 44 | \donttest{# Get entire 'Core' table for the German Bundestag 45 | deu_core <- get_core(legislature = "deu") 46 | tibble::glimpse(deu_core) 47 | 48 | # Get 'Core' table for 16th session of the German Bundestag 49 | deu_core_subset <- dplyr::semi_join(x = deu_core, 50 | y = dplyr::filter(get_political(legislature = "deu"), 51 | session == 16), 52 | by = "pageid") 53 | tibble::glimpse(deu_core_subset) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /tests/testthat/test-get_profession.R: -------------------------------------------------------------------------------- 1 | test_that("Professions table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_profession("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_profession("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_profession("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_profession("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_profession("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_profession("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_profession("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_profession("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_profession("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_profession("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_profession("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_profession("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_profession("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_profession("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_profession("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_profession("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_profession("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_profession("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_profession("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_profession("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_profession("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_profession("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_profession("aut"))[1], c("wikidataid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_profession("can"))[1], c("wikidataid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_profession("cze"))[1], c("wikidataid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_profession("esp"))[1], c("wikidataid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_profession("fra"))[1], c("wikidataid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_profession("deu"))[1], c("wikidataid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_profession("irl"))[1], c("wikidataid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_profession("sco"))[1], c("wikidataid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_profession("gbr"))[1], c("wikidataid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_profession("usa_house"))[1], c("wikidataid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_profession("usa_senate"))[1], c("wikidataid")) 70 | Sys.sleep(100) 71 | }) 72 | -------------------------------------------------------------------------------- /man/get_political.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_political.R 3 | \name{get_political} 4 | \alias{get_political} 5 | \title{Fetch 'Political' table} 6 | \format{ 7 | Data frame in long format with columns (varies by legislature): 8 | \itemize{ 9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | \item{session: Legislative period (of class \sQuote{integer}).} 11 | \item{party: A legislator's party affiliation (of class \sQuote{character}). See \url{https://github.com/saschagobel/legislatoR} for the full form of abbreviated party names and english translations of non-english party names} 12 | \item{constituency: A legislator's constituency (of class \sQuote{character}).} 13 | \item{constituency2: A legislator's constituency (upper level, if applicable, of class \sQuote{character}).} 14 | \item{constituency_id: ID of a legislator's constituency (of class \sQuote{character}).} 15 | \item{session_start: Date the legislative period started (of class \sQuote{Date}).} 16 | \item{session_end: Date the legislative period ended (of class \sQuote{Date}).} 17 | \item{service: A legislator's period of service in days during the respective session (of class \sQuote{integer})} 18 | \item{government (or similar): Indicator of a legislator's majority status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.} 19 | \item{leader (or similar): Indicator of a legislator's leader status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.} 20 | } 21 | } 22 | \source{ 23 | Wikipedia, \url{https://www.wikipedia.org/} \cr 24 | Czech Republic Parliamentary Members Archive \url{https://public.psp.cz/sqw/fsnem.sqw?zvo=1} \cr 25 | Spain Parliamentary Members Archive \url{https://www.congreso.es/es/busqueda-de-diputados} 26 | } 27 | \usage{ 28 | get_political(legislature) 29 | } 30 | \arguments{ 31 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 32 | } 33 | \value{ 34 | A data frame with columns as specified above. 35 | } 36 | \description{ 37 | Fetches political data of legislators for the specified legislature. Requires a working Internet connection. 38 | } 39 | \examples{ 40 | \donttest{# Get entire 'Political' table for the Czech Poslanecka Snemovna 41 | cze_political <- get_political(legislature = "cze") 42 | tibble::glimpse(cze_political) 43 | 44 | # Get 'Political' table for female DSP party members of the Czech Poslanecka Snemovna 45 | cze_political_subset <- dplyr::semi_join(x = dplyr::filter(cze_political, 46 | party == "ODS"), 47 | y = dplyr::filter(get_core(legislature = "cze"), 48 | sex == "female"), 49 | by = "pageid") 50 | tibble::glimpse(cze_political_subset) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /R/get_office.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Offices' table 2 | #' 3 | #' Fetches political and other offices of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame in wide format with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | #' \item{office_1: political or other office held by a legislator (of class \sQuote{logical}).} 11 | #' \item{office_2: ... (of class \sQuote{logical}).} 12 | #' \item{...} 13 | #' } 14 | #' @examples 15 | #' \donttest{# Get entire 'Offices' table for the United States Senate 16 | #' usa_offices <- get_office(legislature = "usa_senate") 17 | #' tibble::glimpse(usa_offices) 18 | #' 19 | #' # Get 'Offices' table for male members of the United States Senate 20 | #' usa_offices_subset <- dplyr::semi_join(x = usa_offices, 21 | #' y = dplyr::filter(get_core(legislature = "usa_senate"), 22 | #' sex == "female"), 23 | #' by = "wikidataid") 24 | #' tibble::glimpse(usa_offices_subset) 25 | #' } 26 | #' @source 27 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 28 | #' @export 29 | #' @importFrom curl nslookup 30 | #' @import dplyr 31 | get_office <- function(legislature) { 32 | if (length(legislature) > 1) { 33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 34 | } 35 | if (!(legislature %in% c("aut", "bra", "can", "cze", 36 | "deu", "esp", "fra", "gbr", 37 | "irl", "isr", "ita_house", "ita_senate", 38 | "jpn", "nld", "sco", "tur", 39 | "usa_house", "usa_senate"))) { 40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 41 | paste0( 42 | paste0("\"", legislature, "\""), 43 | collapse = ", "), 44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 45 | } 46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 48 | } 49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 50 | file_id <- sysdata %>% filter(.data$table == "office" & .data$country == legislature) 51 | dvurl <- paste0(endpoint, file_id$id) 52 | connect <- url(dvurl) 53 | on.exit(close(connect)) 54 | dataset <- readRDS(connect) 55 | return(dataset) 56 | } 57 | -------------------------------------------------------------------------------- /R/get_portrait.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Portrait' table 2 | #' 3 | #' Fetches portrait urls of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | #' \item{image_url: URL linking to a legislator's portrait on Wikimedia Commons (of class \sQuote{character}).} 11 | #' } 12 | #' @examples 13 | #' \donttest{# Get entire 'Portraits' table for the United States Senate 14 | #' usa_portraits <- get_portrait(legislature = "usa_senate") 15 | #' tibble::glimpse(usa_portraits) 16 | #' 17 | #' # Get 'Portraits' table for Democratic members of the United States Senate 18 | #' usa_port_subset <- dplyr::semi_join(x = usa_portraits, 19 | #' y = dplyr::filter(get_political(legislature = "usa_senate"), 20 | #' party == "D"), 21 | #' by = "pageid") 22 | #' tibble::glimpse(usa_port_subset) 23 | #' } 24 | #' @source 25 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr 26 | #' Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} 27 | #' @export 28 | #' @importFrom curl nslookup 29 | #' @import dplyr 30 | get_portrait <- function(legislature) { 31 | if (length(legislature) > 1) { 32 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 33 | } 34 | if (!(legislature %in% c("aut", "bra", "can", "cze", 35 | "deu", "esp", "fra", "gbr", 36 | "irl", "isr", "ita_house", "ita_senate", 37 | "jpn", "nld", "sco", "tur", 38 | "usa_house", "usa_senate"))) { 39 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 40 | paste0( 41 | paste0("\"", legislature, "\""), 42 | collapse = ", "), 43 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 44 | } 45 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 46 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 47 | } 48 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 49 | file_id <- sysdata %>% filter(.data$table == "portrait" & .data$country == legislature) 50 | dvurl <- paste0(endpoint, file_id$id) 51 | connect <- url(dvurl) 52 | on.exit(close(connect)) 53 | dataset <- readRDS(connect) 54 | return(dataset) 55 | } 56 | -------------------------------------------------------------------------------- /R/get_profession.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Professions' table 2 | #' 3 | #' Fetches occupational data of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame in wide format with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | #' \item{occupation_1: occupation a legislator practiced or was trained in (of class \sQuote{logical}).} 11 | #' \item{occupation_2: ... (of class \sQuote{logical}).} 12 | #' \item{...} 13 | #' } 14 | #' @examples 15 | #' \donttest{# Get entire 'Professions' table for the United States House 16 | #' usa_offices <- get_office(legislature = "usa_house") 17 | #' tibble::glimpse(usa_offices) 18 | #' 19 | #' # Get 'Professions' table for female members of the United States House 20 | #' usa_offices_subset <- dplyr::semi_join(x = usa_offices, 21 | #' y = dplyr::filter(get_core(legislature = "usa_house"), 22 | #' sex == "female"), 23 | #' by = "wikidataid") 24 | #' tibble::glimpse(usa_offices_subset) 25 | #' } 26 | #' @source 27 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 28 | #' @export 29 | #' @importFrom curl nslookup 30 | #' @import dplyr 31 | get_profession <- function(legislature) { 32 | if (length(legislature) > 1) { 33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 34 | } 35 | if (!(legislature %in% c("aut", "bra", "can", "cze", 36 | "deu", "esp", "fra", "gbr", 37 | "irl", "isr", "ita_house", "ita_senate", 38 | "jpn", "nld", "sco", "tur", 39 | "usa_house", "usa_senate"))) { 40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 41 | paste0( 42 | paste0("\"", legislature, "\""), 43 | collapse = ", "), 44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 45 | } 46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 48 | } 49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 50 | file_id <- sysdata %>% filter(.data$table == "profession" & .data$country == legislature) 51 | dvurl <- paste0(endpoint, file_id$id) 52 | connect <- url(dvurl) 53 | on.exit(close(connect)) 54 | dataset <- readRDS(connect) 55 | return(dataset) 56 | } 57 | -------------------------------------------------------------------------------- /R/get_traffic.R: -------------------------------------------------------------------------------- 1 | #' Fetch Wikipedia 'Traffic' table 2 | #' 3 | #' Fetches daily user traffic on legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns: 8 | #' \itemize{ 9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | #' \item{date: Date for which user traffic is recorded, from 2015-07-01 to 2018-12-31 UTC (of class \sQuote{POSIXct}).} 11 | #' \item{traffic: Daily non-unique user visits (of class \sQuote{numeric}).} 12 | #' } 13 | #' @examples 14 | #' \donttest{# Get entire 'Traffic' table for the Scottish Parliament 15 | #' sco_traffic <- get_traffic(legislature = "sco") 16 | #' tibble::glimpse(sco_traffic) 17 | #' 18 | #' # Add Wikidataid to 'Traffic' table for the Scottish Parliament 19 | #' sco_traffic_subset <- dplyr::inner_join(x = dplyr::select(get_core(legislature = "sco"), 20 | #' pageid, wikidataid), 21 | #' y = sco_traffic, 22 | #' by = "pageid") 23 | #' tibble::glimpse(sco_traffic_subset) 24 | #' } 25 | #' @source 26 | #' Wikimedia API, \url{https://wikimedia.org/api/rest_v1/} \cr 27 | #' \url{http://petermeissner.de:8880/} 28 | #' @export 29 | #' @importFrom curl nslookup 30 | #' @import dplyr 31 | get_traffic <- function(legislature) { 32 | if (length(legislature) > 1) { 33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 34 | } 35 | if (!(legislature %in% c("aut", "bra", "can", "cze", 36 | "deu", "esp", "fra", "gbr", 37 | "irl", "isr", "ita_house", "ita_senate", 38 | "jpn", "nld", "sco", "tur", 39 | "usa_house", "usa_senate"))) { 40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 41 | paste0( 42 | paste0("\"", legislature, "\""), 43 | collapse = ", "), 44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 45 | } 46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 48 | } 49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 50 | file_id <- sysdata %>% filter(.data$table == "traffic" & .data$country == legislature) 51 | dvurl <- paste0(endpoint, file_id$id) 52 | connect <- url(dvurl) 53 | on.exit(close(connect)) 54 | dataset <- readRDS(connect) 55 | return(dataset) 56 | } 57 | -------------------------------------------------------------------------------- /R/get_social.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Social' table 2 | #' 3 | #' Fetches social media handles and website URLs of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | #' \item{twitter: Twitter handle (of class \sQuote{character}).} 11 | #' \item{facebook: Facebook handle (of class \sQuote{character}).} 12 | #' \item{youtube: Youtube ID (of class \sQuote{character}).} 13 | #' \item{googlep: Google Plus ID (of class \sQuote{character}).} 14 | #' \item{instagram: Instagram handle (of class \sQuote{character}).} 15 | #' \item{linkedin: LinkedIn ID (of class \sQuote{character}).} 16 | #' \item{website: Personal website URL (of class \sQuote{character}).} 17 | #' } 18 | #' @examples 19 | #' \donttest{# Get entire 'Social' table forthe UK House of Commons 20 | #' gbr_social <- get_social(legislature = "gbr") 21 | #' tibble::glimpse(gbr_social) 22 | #' 23 | #' # Get 'Social' table for members of the UK House of Commons with available TheyWorkForYou ID 24 | #' gbr_social_subset <- dplyr::semi_join(x = gbr_social, 25 | #' y = dplyr::filter(get_ids(legislature = "gbr"), 26 | #' !is.na(theyworkforyou)), 27 | #' by = "wikidataid") 28 | #' tibble::glimpse(gbr_social_subset) 29 | #' } 30 | #' @source 31 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 32 | #' @export 33 | #' @importFrom curl nslookup 34 | #' @import dplyr 35 | get_social <- function(legislature) { 36 | if (length(legislature) > 1) { 37 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 38 | } 39 | if (!(legislature %in% c("aut", "bra", "can", "cze", 40 | "deu", "esp", "fra", "gbr", 41 | "irl", "isr", "ita_house", "ita_senate", 42 | "jpn", "nld", "sco", "tur", 43 | "usa_house", "usa_senate"))) { 44 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 45 | paste0( 46 | paste0("\"", legislature, "\""), 47 | collapse = ", "), 48 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 49 | } 50 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 51 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 52 | } 53 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 54 | file_id <- sysdata %>% filter(.data$table == "social" & .data$country == legislature) 55 | dvurl <- paste0(endpoint, file_id$id) 56 | connect <- url(dvurl) 57 | on.exit(close(connect)) 58 | dataset <- readRDS(connect) 59 | return(dataset) 60 | } 61 | -------------------------------------------------------------------------------- /R/get_history.R: -------------------------------------------------------------------------------- 1 | #' Fetch Wikipedia 'History' table 2 | #' 3 | #' Fetches full revision histories of legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns: 8 | #' \itemize{ 9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | #' \item{revid: Wikipedia edit ID (of class \sQuote{integer}).} 11 | #' \item{parentid: Wikipedia edit ID of the previous revision (of class \sQuote{integer}).} 12 | #' \item{user: Username of registered user responsible for the revision, IP address in case of anonymous revision (of class \sQuote{character}).} 13 | #' \item{userid: ID of registered user responsible for the revision, 0 in case of anonymous revision (of class \sQuote{integer}).} 14 | #' \item{timestamp: Date and time of the revision (of class \sQuote{POSIXct}).} 15 | #' \item{size: Revision size in bytes (of class \sQuote{integer}).} 16 | #' \item{comment: Revision comment (of class \sQuote{character}).} 17 | #' } 18 | #' @examples 19 | #' \donttest{# Get entire 'History' table for the Austrian Nationalrat 20 | #' aut_history <- get_history(legislature = "aut") 21 | #' tibble::glimpse(aut_history) 22 | #' 23 | #' # Get 'History' table for NEOS party members of the Austrian Nationalrat 24 | #' aut_history_subset <- dplyr::semi_join(x = aut_history, 25 | #' y = dplyr::filter(get_political(legislature = "aut"), 26 | #' party == "NEOS"), 27 | #' by = "pageid") 28 | #' tibble::glimpse(aut_history_subset) 29 | #' } 30 | #' @source 31 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php} 32 | #' @export 33 | #' @importFrom curl nslookup 34 | #' @import dplyr 35 | get_history <- function(legislature) { 36 | if (length(legislature) > 1) { 37 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 38 | } 39 | if (!(legislature %in% c("aut", "bra", "can", "cze", 40 | "deu", "esp", "fra", "gbr", 41 | "irl", "isr", "ita_house", "ita_senate", 42 | "jpn", "nld", "sco", "tur", 43 | "usa_house", "usa_senate"))) { 44 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 45 | paste0( 46 | paste0("\"", legislature, "\""), 47 | collapse = ", "), 48 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 49 | } 50 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 51 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 52 | } 53 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 54 | file_id <- sysdata %>% filter(.data$table == "history" & .data$country == legislature) 55 | dvurl <- paste0(endpoint, file_id$id) 56 | connect <- url(dvurl) 57 | on.exit(close(connect)) 58 | dataset <- readRDS(connect) 59 | return(dataset) 60 | } 61 | -------------------------------------------------------------------------------- /tests/testthat/test-get_core.R: -------------------------------------------------------------------------------- 1 | test_that("Core table is returned appropriately for each legislature", { 2 | skip_on_cran() 3 | expect_identical(class(get_core("aut")), "data.frame") 4 | Sys.sleep(1) 5 | expect_identical(class(get_core("can")), "data.frame") 6 | Sys.sleep(1) 7 | expect_identical(class(get_core("cze")), "data.frame") 8 | Sys.sleep(1) 9 | expect_identical(class(get_core("esp")), "data.frame") 10 | Sys.sleep(1) 11 | expect_identical(class(get_core("fra")), "data.frame") 12 | Sys.sleep(1) 13 | expect_identical(class(get_core("deu")), "data.frame") 14 | Sys.sleep(1) 15 | expect_identical(class(get_core("irl")), "data.frame") 16 | Sys.sleep(1) 17 | expect_identical(class(get_core("sco")), "data.frame") 18 | Sys.sleep(1) 19 | expect_identical(class(get_core("gbr")), "data.frame") 20 | Sys.sleep(1) 21 | expect_identical(class(get_core("usa_house")), "data.frame") 22 | Sys.sleep(1) 23 | expect_identical(class(get_core("usa_senate")), "data.frame") 24 | Sys.sleep(100) 25 | 26 | expect_true(all(dim(get_core("aut")) > 0)) 27 | Sys.sleep(1) 28 | expect_true(all(dim(get_core("can")) > 0)) 29 | Sys.sleep(1) 30 | expect_true(all(dim(get_core("cze")) > 0)) 31 | Sys.sleep(1) 32 | expect_true(all(dim(get_core("esp")) > 0)) 33 | Sys.sleep(1) 34 | expect_true(all(dim(get_core("fra")) > 0)) 35 | Sys.sleep(1) 36 | expect_true(all(dim(get_core("deu")) > 0)) 37 | Sys.sleep(1) 38 | expect_true(all(dim(get_core("irl")) > 0)) 39 | Sys.sleep(1) 40 | expect_true(all(dim(get_core("sco")) > 0)) 41 | Sys.sleep(1) 42 | expect_true(all(dim(get_core("gbr")) > 0)) 43 | Sys.sleep(1) 44 | expect_true(all(dim(get_core("usa_house")) > 0)) 45 | Sys.sleep(1) 46 | expect_true(all(dim(get_core("usa_senate")) > 0)) 47 | Sys.sleep(100) 48 | 49 | expect_identical(colnames(get_core("aut"))[1:3], c("country", "pageid", "wikidataid")) 50 | Sys.sleep(1) 51 | expect_identical(colnames(get_core("can"))[1:3], c("country", "pageid", "wikidataid")) 52 | Sys.sleep(1) 53 | expect_identical(colnames(get_core("cze"))[1:3], c("country", "pageid", "wikidataid")) 54 | Sys.sleep(1) 55 | expect_identical(colnames(get_core("esp"))[1:3], c("country", "pageid", "wikidataid")) 56 | Sys.sleep(1) 57 | expect_identical(colnames(get_core("fra"))[1:3], c("country", "pageid", "wikidataid")) 58 | Sys.sleep(1) 59 | expect_identical(colnames(get_core("deu"))[1:3], c("country", "pageid", "wikidataid")) 60 | Sys.sleep(1) 61 | expect_identical(colnames(get_core("irl"))[1:3], c("country", "pageid", "wikidataid")) 62 | Sys.sleep(1) 63 | expect_identical(colnames(get_core("sco"))[1:3], c("country", "pageid", "wikidataid")) 64 | Sys.sleep(1) 65 | expect_identical(colnames(get_core("gbr"))[1:3], c("country", "pageid", "wikidataid")) 66 | Sys.sleep(1) 67 | expect_identical(colnames(get_core("usa_house"))[1:3], c("country", "pageid", "wikidataid")) 68 | Sys.sleep(1) 69 | expect_identical(colnames(get_core("usa_senate"))[1:3], c("country", "pageid", "wikidataid")) 70 | Sys.sleep(100) 71 | 72 | expect_identical(sample(get_core("aut")$country, 1), "AUT") 73 | Sys.sleep(1) 74 | expect_identical(sample(get_core("can")$country, 1), "CAN") 75 | Sys.sleep(1) 76 | expect_identical(sample(get_core("cze")$country, 1), "CZE") 77 | Sys.sleep(1) 78 | expect_identical(sample(get_core("esp")$country, 1), "ESP") 79 | Sys.sleep(1) 80 | expect_identical(sample(get_core("fra")$country, 1), "FRA") 81 | Sys.sleep(1) 82 | expect_identical(sample(get_core("deu")$country, 1), "DEU") 83 | Sys.sleep(1) 84 | expect_identical(sample(get_core("irl")$country, 1), "IRL") 85 | Sys.sleep(1) 86 | expect_identical(sample(get_core("sco")$country, 1), "SCO") 87 | Sys.sleep(1) 88 | expect_identical(sample(get_core("gbr")$country, 1), "GBR") 89 | Sys.sleep(1) 90 | expect_identical(sample(get_core("usa_house")$country, 1), "USA-H") 91 | Sys.sleep(1) 92 | expect_identical(sample(get_core("usa_senate")$country, 1), "USA-S") 93 | Sys.sleep(100) 94 | }) 95 | -------------------------------------------------------------------------------- /R/get_core.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Core' table 2 | #' 3 | #' Fetches sociodemographic data of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{country: ISO 3166-1 alpha-3 three-letter country code (of class \sQuote{character}).} 10 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer} or \sQuote{character}).} 11 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 12 | #' \item{wikititle: A legislator's undirected Wikipedia title (of class \sQuote{character}).} 13 | #' \item{name: A legislator's full name (of class \sQuote{character}).} 14 | #' \item{sex: A legislator's sex (of class \sQuote{character}).} 15 | #' \item{ethnicity: A legislator's ethnicity (of class \sQuote{character}).} 16 | #' \item{religion: A legislator's religious denomination (of class \sQuote{character}).} 17 | #' \item{birth: A legislator's date of birth (of class \sQuote{POSIXct}).} 18 | #' \item{death: A legislator's date of death (of class \sQuote{POSIXct}).} 19 | #' \item{birthplace: Comma separated latitude and longitude of a legislator's place of birth (of class \sQuote{character}).} 20 | #' \item{deathplace: Comma separated latitude and longitude of a legislator's place of death (of class \sQuote{character}).} 21 | #' } 22 | #' @examples 23 | #' \donttest{# Get entire 'Core' table for the German Bundestag 24 | #' deu_core <- get_core(legislature = "deu") 25 | #' tibble::glimpse(deu_core) 26 | #' 27 | #' # Get 'Core' table for 16th session of the German Bundestag 28 | #' deu_core_subset <- dplyr::semi_join(x = deu_core, 29 | #' y = dplyr::filter(get_political(legislature = "deu"), 30 | #' session == 16), 31 | #' by = "pageid") 32 | #' tibble::glimpse(deu_core_subset) 33 | #' } 34 | #' @source 35 | #' Wikipedia, \url{https://www.wikipedia.org/} \cr 36 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr 37 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} \cr 38 | #' Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} \cr 39 | #' Face++ Cognitive Services API, \url{https://www.faceplusplus.com/} \cr 40 | #' Germany Bundestag Open Data, \url{https://www.bundestag.de/services/opendata} 41 | #' @export 42 | #' @importFrom curl nslookup 43 | #' @import dplyr 44 | get_core <- function(legislature) { 45 | if (length(legislature) > 1) { 46 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 47 | } 48 | if (!(legislature %in% c("aut", "bra", "can", "cze", 49 | "deu", "esp", "fra", "gbr", 50 | "irl", "isr", "ita_house", "ita_senate", 51 | "jpn", "nld", "sco", "tur", 52 | "usa_house", "usa_senate"))) { 53 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 54 | paste0( 55 | paste0("\"", legislature, "\""), 56 | collapse = ", "), 57 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 58 | } 59 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 60 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 61 | } 62 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 63 | file_id <- sysdata %>% filter(.data$table == "core" & .data$country == legislature) 64 | dvurl <- paste0(endpoint, file_id$id) 65 | connect <- url(dvurl) 66 | on.exit(close(connect)) 67 | dataset <- readRDS(connect) 68 | return(dataset) 69 | } 70 | -------------------------------------------------------------------------------- /man/get_ids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_ids.R 3 | \name{get_ids} 4 | \alias{get_ids} 5 | \title{Fetch 'IDs' table} 6 | \format{ 7 | Data frame with columns (varies by legislature): 8 | \itemize{ 9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | \item{parlid: Parliamentary website or website ID (of class \sQuote{character}).} 11 | \item{btvote: ID for BTVote data on all roll call votes taken in the German Bundestag from 1949 to 2013 and for Reelection Prospects data. The respective ID in BTVote and Reelection Prospects data is 'mp_id' (of class \sQuote{character}).} 12 | \item{parlspeech: ID for ParlSpeech datasets containing full-text vectors of plenary speeches. The respective ID in ParlSpeech data is 'speaker' (of class \sQuote{character}).} 13 | \item{dpsi: ID for Database of Parliamentary Speeches in Ireland. The respective ID in the Database of Parliamentary Speeches in Ireland is 'memberID' (of class \sQuote{character}).} 14 | \item{eggersspirling: ID for Eggers and Spirling British political development database. The respective ID in Eggers and Spirling data is 'member.id' (of class \sQuote{character}).} 15 | \item{bioguide: ID for the Voteview congressional roll-call votes database. The respective ID in Voteview data is 'bioguide_id' (of class \sQuote{character}).} 16 | \item{icpsr: ID for Congressional Bills Project database. The respective ID in Congressional Bills Project data is 'icpsr' (of class \sQuote{character}).} 17 | \item{sycomore: Sycomore database of French MPs (of class \sQuote{character}).} 18 | \item{libcon: Library of Congress ID (of class \sQuote{character}).} 19 | \item{gnd: German National Library ID (of class \sQuote{character}).} 20 | \item{bnf: French National Library ID (of class \sQuote{character}).} 21 | \item{freebase: Freebase ID (of class \sQuote{character}).} 22 | \item{munzinger: Munzinger archive ID (of class \sQuote{character}).} 23 | \item{nndb: Notable Names Database ID (of class \sQuote{character}).} 24 | \item{imdb: Internet Movie Database ID (of class \sQuote{character}).} 25 | \item{brittanica: Encyclopedia Brittanica ID (of class \sQuote{character}).} 26 | \item{quora: Quora ID (of class \sQuote{character}).} 27 | \item{votesmart: Project Votesmart ID (of class \sQuote{character}).} 28 | \item{fec: Federal Election Commission ID (of class \sQuote{character}).} 29 | \item{ballotpedia: Ballotpedia ID (of class \sQuote{character}).} 30 | \item{opensecrets: Opensecrets ID (of class \sQuote{character}).} 31 | \item{genealogists: Genealogists ID (of class \sQuote{character}).} 32 | \item{politfacts: Politfacts ID (of class \sQuote{character}).} 33 | \item{nkcr: Czech National Library ID (of class \sQuote{character}).} 34 | \item{parlbio: parliament.uk biography ID (of class \sQuote{character}).} 35 | \item{parlthesaurus: UK Parliament thesaurus ID (of class \sQuote{character}).} 36 | \item{national: UK National Archives ID (of class \sQuote{character}).} 37 | \item{hansard: Hansard (1803-2005) ID (of class \sQuote{character}).} 38 | \item{publicwhip: PublicWhip ID (of class \sQuote{character}).} 39 | \item{theyworkforyou: TheyWorkForYou ID (of class \sQuote{character}).} 40 | } 41 | } 42 | \source{ 43 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 44 | } 45 | \usage{ 46 | get_ids(legislature) 47 | } 48 | \arguments{ 49 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.} 50 | } 51 | \value{ 52 | A data frame with columns as specified above. 53 | } 54 | \description{ 55 | Fetches a range of IDs of legislators for the specified legislature. Requires a working Internet connection. 56 | } 57 | \examples{ 58 | \donttest{# Get entire 'IDs' table for the for the United States Senate 59 | usa_ids <- get_ids(legislature = "usa_senate") 60 | tibble::glimpse(usa_ids) 61 | 62 | # Get ICPSR IDs and add 'Offices' table for the United States House 63 | usa_ids_subset <- dplyr::inner_join(x = dplyr::filter(usa_ids, 64 | !is.na(icpsr)), 65 | y = get_office(legislature = "usa_senate"), 66 | by = "wikidataid") 67 | tibble::glimpse(usa_ids_subset) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /R/get_political.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'Political' table 2 | #' 3 | #' Fetches political data of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame in long format with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).} 10 | #' \item{session: Legislative period (of class \sQuote{integer}).} 11 | #' \item{party: A legislator's party affiliation (of class \sQuote{character}). See \url{https://github.com/saschagobel/legislatoR} for the full form of abbreviated party names and english translations of non-english party names} 12 | #' \item{constituency: A legislator's constituency (of class \sQuote{character}).} 13 | #' \item{constituency2: A legislator's constituency (upper level, if applicable, of class \sQuote{character}).} 14 | #' \item{constituency_id: ID of a legislator's constituency (of class \sQuote{character}).} 15 | #' \item{session_start: Date the legislative period started (of class \sQuote{Date}).} 16 | #' \item{session_end: Date the legislative period ended (of class \sQuote{Date}).} 17 | #' \item{service: A legislator's period of service in days during the respective session (of class \sQuote{integer})} 18 | #' \item{government (or similar): Indicator of a legislator's majority status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.} 19 | #' \item{leader (or similar): Indicator of a legislator's leader status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.} 20 | #' } 21 | #' @examples 22 | #' \donttest{# Get entire 'Political' table for the Czech Poslanecka Snemovna 23 | #' cze_political <- get_political(legislature = "cze") 24 | #' tibble::glimpse(cze_political) 25 | #' 26 | #' # Get 'Political' table for female DSP party members of the Czech Poslanecka Snemovna 27 | #' cze_political_subset <- dplyr::semi_join(x = dplyr::filter(cze_political, 28 | #' party == "ODS"), 29 | #' y = dplyr::filter(get_core(legislature = "cze"), 30 | #' sex == "female"), 31 | #' by = "pageid") 32 | #' tibble::glimpse(cze_political_subset) 33 | #' } 34 | #' @source 35 | #' Wikipedia, \url{https://www.wikipedia.org/} \cr 36 | #' Czech Republic Parliamentary Members Archive \url{https://public.psp.cz/sqw/fsnem.sqw?zvo=1} \cr 37 | #' Spain Parliamentary Members Archive \url{https://www.congreso.es/es/busqueda-de-diputados} 38 | #' @export 39 | #' @importFrom curl nslookup 40 | #' @import dplyr 41 | get_political <- function(legislature) { 42 | if (length(legislature) > 1) { 43 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 44 | } 45 | if (!(legislature %in% c("aut", "bra", "can", "cze", 46 | "deu", "esp", "fra", "gbr", 47 | "irl", "isr", "ita_house", "ita_senate", 48 | "jpn", "nld", "sco", "tur", 49 | "usa_house", "usa_senate"))) { 50 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 51 | paste0( 52 | paste0("\"", legislature, "\""), 53 | collapse = ", "), 54 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 55 | } 56 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 57 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 58 | } 59 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 60 | file_id <- sysdata %>% filter(.data$table == "political" & .data$country == legislature) 61 | dvurl <- paste0(endpoint, file_id$id) 62 | connect <- url(dvurl) 63 | on.exit(close(connect)) 64 | dataset <- readRDS(connect) 65 | return(dataset) 66 | } 67 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # legislatoR 1.1 2 | 3 | * Data for Canada, Czech Republic, France, Germany, Scotland, and USA updated 4 | * Data for Brazil, Israel, Italy, Japan, Netherlands, and Turkey newly added 5 | * All data moved to Harvard Dataverse 6 | 7 | 8 | # legislatoR 1.0 9 | 10 | * All legislative periods of Spain's Congreso de los Diputados added to the database (14 in total). 11 | * Ongoing session of the Austrian Nationalrat added to the database (27th) and concluded session updated (26th). 12 | * Ongoing session of the Canadian House of Commons added to the database (43th) and concluded session updated (42th). 13 | * Ongoing session of the Irish Dáil added to the database (33th) and concluded session updated (32th). 14 | * Ongoing session of the UK House of Commons added to the database (58th) and concluded session updated (57th). 15 | * Irish Dáil data integrated with Database of Parliamentary Speeches in Ireland. 16 | * Czech Poslanecka Snemovna data integrated with ParlSpeech V1. 17 | * UK House of Commons data integrated with ParlSpeech V1. 18 | * Spanish Congreso de los Diputados data integrated with ParlSpeech V2. 19 | * Austrian Nationalrat data integrated with ParlSpeech V2. 20 | * Daily Wikipedia page traffic now goes back to December 2007 (except for the US House, where file storage restrictions currently limit traffic to range back to July 2009 only). 21 | * New `cld_content()` function that offers a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions. 22 | * "Introducing legislatoR" Vignette added. 23 | * Now available via CRAN 24 | * New Logo/Hexsticker. 25 | 26 | 27 | # legislatoR 0.2.0 28 | 29 | * 1st to 37th legislative periods of UK's House of Commons added to the database (37 in total). 30 | * Missing legislators and services variable added to all sessions of the French Assemblée. 31 | * German Bundestag data integrated with BTVote and ParlSpeech data. 32 | * UK House of Commons data integrated with Eggers and Spirling british political development database (starting from 38th session). 33 | * United States House and Senate data integrated with Voteview and congressional bills project data. 34 | * Missing Twitter handles added for the Austrian Nationalrat, Czech Poslanecka Snemovna, French Assemblée, German Bundestag, Irish Dail, UK House of Commons, and United States House and Senate (841 in total). 35 | * Missing religious affilition added for United States House and Senate and Canada's House of Commons. 36 | * Missing ethnicity added for Canada and UK's House of Commons. 37 | 38 | # legislatoR 0.1.0 39 | 40 | * Changed some function calls: `get_facial` is now `get_portrait` and `get_occupation` is now `get_profession`. 41 | * Changed valid legislature arguments for `get_` functions. Full legislature names are no longer accepted as legislature codes. Instead, three-letter country codes such as `aut`, `can`, `cze`, `fra`, `deu`, `irl`, `sco`, `gbr`, `usa_house`, `usa_senate` must be used. 42 | * All legislative periods of Canada's House of Commons added to the database (42 in total). 43 | * All legislative periods of Czech Republic's Poslanecka Snemovna added to the database (8 in total). 44 | * All legislative periods of Scotland's Parliament added to the database (5 in total). 45 | * 38th - 57th legislative periods of UK's House of Commons added to the database (20 in total). 46 | * Current session of the Austrian Nationalrat added to the database (26th). 47 | * Current session of the German Bundestag added to the database (19th). 48 | * Current session of the French Assemblée added to the database (15th). 49 | * Current session of the United States House of representatives added to the database (116th). 50 | * Current session of the United States Senate added to the database (116th). 51 | * ISO 3166-1 alpha-3 three-letter country codes added to all core datasets. 52 | * Start- and end date of sessions added to all political datasets. 53 | * Missing legislators and services variable added to all sessions of the Irish Dáil. 54 | * Error in United States Senate Wikipedia revision records corrected. 55 | * Missing religious affiliation added from 1st to 18th German Bundestag using official data. 56 | * Erroneous information on United States Senate and House members' period of service corrected. 57 | * Additional variables on government/opposition status and leader positions added to all sessions of the Canadian House of Commons and United States Congress. 58 | * Correction and alignment of party names across legislative periods for all parliaments. 59 | * Emotion estimates removed from the portraits dataset. 60 | -------------------------------------------------------------------------------- /R/get_ids.R: -------------------------------------------------------------------------------- 1 | #' Fetch 'IDs' table 2 | #' 3 | #' Fetches a range of IDs of legislators for the specified legislature. Requires a working Internet connection. 4 | #' 5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. 6 | #' @return A data frame with columns as specified above. 7 | #' @format Data frame with columns (varies by legislature): 8 | #' \itemize{ 9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).} 10 | #' \item{parlid: Parliamentary website or website ID (of class \sQuote{character}).} 11 | #' \item{btvote: ID for BTVote data on all roll call votes taken in the German Bundestag from 1949 to 2013 and for Reelection Prospects data. The respective ID in BTVote and Reelection Prospects data is 'mp_id' (of class \sQuote{character}).} 12 | #' \item{parlspeech: ID for ParlSpeech datasets containing full-text vectors of plenary speeches. The respective ID in ParlSpeech data is 'speaker' (of class \sQuote{character}).} 13 | #' \item{dpsi: ID for Database of Parliamentary Speeches in Ireland. The respective ID in the Database of Parliamentary Speeches in Ireland is 'memberID' (of class \sQuote{character}).} 14 | #' \item{eggersspirling: ID for Eggers and Spirling British political development database. The respective ID in Eggers and Spirling data is 'member.id' (of class \sQuote{character}).} 15 | #' \item{bioguide: ID for the Voteview congressional roll-call votes database. The respective ID in Voteview data is 'bioguide_id' (of class \sQuote{character}).} 16 | #' \item{icpsr: ID for Congressional Bills Project database. The respective ID in Congressional Bills Project data is 'icpsr' (of class \sQuote{character}).} 17 | #' \item{sycomore: Sycomore database of French MPs (of class \sQuote{character}).} 18 | #' \item{libcon: Library of Congress ID (of class \sQuote{character}).} 19 | #' \item{gnd: German National Library ID (of class \sQuote{character}).} 20 | #' \item{bnf: French National Library ID (of class \sQuote{character}).} 21 | #' \item{freebase: Freebase ID (of class \sQuote{character}).} 22 | #' \item{munzinger: Munzinger archive ID (of class \sQuote{character}).} 23 | #' \item{nndb: Notable Names Database ID (of class \sQuote{character}).} 24 | #' \item{imdb: Internet Movie Database ID (of class \sQuote{character}).} 25 | #' \item{brittanica: Encyclopedia Brittanica ID (of class \sQuote{character}).} 26 | #' \item{quora: Quora ID (of class \sQuote{character}).} 27 | #' \item{votesmart: Project Votesmart ID (of class \sQuote{character}).} 28 | #' \item{fec: Federal Election Commission ID (of class \sQuote{character}).} 29 | #' \item{ballotpedia: Ballotpedia ID (of class \sQuote{character}).} 30 | #' \item{opensecrets: Opensecrets ID (of class \sQuote{character}).} 31 | #' \item{genealogists: Genealogists ID (of class \sQuote{character}).} 32 | #' \item{politfacts: Politfacts ID (of class \sQuote{character}).} 33 | #' \item{nkcr: Czech National Library ID (of class \sQuote{character}).} 34 | #' \item{parlbio: parliament.uk biography ID (of class \sQuote{character}).} 35 | #' \item{parlthesaurus: UK Parliament thesaurus ID (of class \sQuote{character}).} 36 | #' \item{national: UK National Archives ID (of class \sQuote{character}).} 37 | #' \item{hansard: Hansard (1803-2005) ID (of class \sQuote{character}).} 38 | #' \item{publicwhip: PublicWhip ID (of class \sQuote{character}).} 39 | #' \item{theyworkforyou: TheyWorkForYou ID (of class \sQuote{character}).} 40 | #' } 41 | #' @examples 42 | #' \donttest{# Get entire 'IDs' table for the for the United States Senate 43 | #' usa_ids <- get_ids(legislature = "usa_senate") 44 | #' tibble::glimpse(usa_ids) 45 | #' 46 | #' # Get ICPSR IDs and add 'Offices' table for the United States House 47 | #' usa_ids_subset <- dplyr::inner_join(x = dplyr::filter(usa_ids, 48 | #' !is.na(icpsr)), 49 | #' y = get_office(legislature = "usa_senate"), 50 | #' by = "wikidataid") 51 | #' tibble::glimpse(usa_ids_subset) 52 | #' } 53 | #' @source 54 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} 55 | #' @export 56 | #' @importFrom curl nslookup 57 | #' @import dplyr 58 | get_ids <- function(legislature) { 59 | if (length(legislature) > 1) { 60 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.") 61 | } 62 | if (!(legislature %in% c("aut", "bra", "can", "cze", 63 | "deu", "esp", "fra", "gbr", 64 | "irl", "isr", "ita_house", "ita_senate", 65 | "jpn", "nld", "sco", "tur", 66 | "usa_house", "usa_senate"))) { 67 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ", 68 | paste0( 69 | paste0("\"", legislature, "\""), 70 | collapse = ", "), 71 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures.")) 72 | } 73 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) { 74 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.") 75 | } 76 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/" 77 | file_id <- sysdata %>% filter(.data$table == "ids" & .data$country == legislature) 78 | dvurl <- paste0(endpoint, file_id$id) 79 | connect <- url(dvurl) 80 | on.exit(close(connect)) 81 | dataset <- readRDS(connect) 82 | return(dataset) 83 | } 84 | -------------------------------------------------------------------------------- /source/preparation_spain.R: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------- 2 | # legislatoR 3 | # Sascha Göbel and Simon Munzert 4 | # Script: preparations for spain 5 | # October 2017 6 | # --------------------------------------------------------------------------------------- 7 | 8 | 9 | #### PREPARATIONS ======================================================================= 10 | 11 | # clear workspace ----------------------------------------------------------------------- 12 | rm(list = ls(all = TRUE)) 13 | 14 | # set working directory ----------------------------------------------------------------- 15 | setwd("D:/Sascha/Projects/legislatoR") 16 | 17 | # install and load packages and functions ----------------------------------------------- 18 | source("./code/packages.R") 19 | source("./code/functions.R") 20 | 21 | 22 | #### DATA PREPARATION =================================================================== 23 | 24 | # join core data, adjust names, and split into core and political ----------------------- 25 | spain <- readRDS("./data/spain") 26 | spain_title <- readRDS("./data/spain_title") 27 | #spain_faces <- readRDS("./data/spain_faces") 28 | spain_sex <- readRDS("./data/spain_sex") 29 | spain_religion <- readRDS("./data/spain_religion") 30 | spain_birth <- readRDS("./data/spain_birth") 31 | spain_death <- readRDS("./data/spain_death") 32 | spain_birthplace <- readRDS("./data/spain_birthplace") 33 | spain_deathplace <-readRDS("./data/spain_deathplace") 34 | spain <- left_join(x = spain, y = spain_title, by = "pageid_unique") %>% 35 | #left_join(x = ., y = spain_faces[,c(1,2)], by = "pageid") %>% # no ethnicity for spain 36 | left_join(x = ., y = spain_sex, by = "wikidataid") %>% 37 | left_join(x = ., y = spain_religion, by = "wikidataid") %>% 38 | left_join(x = ., y = spain_birth, by = "wikidataid") %>% 39 | left_join(x = ., y = spain_death, by = "wikidataid") %>% 40 | left_join(x = ., y = spain_birthplace, by = "wikidataid") %>% 41 | left_join(x = ., y = spain_deathplace, by = "wikidataid") 42 | colnames(spain)[c(27,31,32)] <- c("wikititle", "birth", "death") 43 | spain <- spain %>% dplyr::select(country, pageid_unique, wikidataid, wikititle, name, sex, 44 | religion, birth, death, birthplace, deathplace, 45 | session, party, group, constituency, session_start, session_end, 46 | service) 47 | spain_core <- spain[!duplicated(spain$pageid_unique), 1:11] 48 | colnames(spain_core)[2] <- "pageid" 49 | spain_core$wikidataid <- ifelse(is.na(spain_core$wikidataid), 50 | paste0(spain_core$pageid, "-wd"), 51 | spain_core$wikidataid) 52 | spain_political <- spain[c(2, 12:18)] 53 | colnames(spain_political)[1] <- "pageid" 54 | rm(spain, spain_title, spain_sex, spain_religion, spain_birth, 55 | spain_death, spain_birthplace, spain_deathplace) 56 | 57 | # correct some name mismatches, pageids and wikidataids are correct 58 | spain_core[match(c("Q15256012", "Q2748756","Q66663492","Q44409046", 59 | "Q44630197", "Q64166732", "Q66663512", "Q11955329", 60 | "Q12399503", "Q5997107", "Q66663449", "Q44630268", 61 | "Q44409223", "Q44519245", "Q66663490", "Q448547", 62 | "Q3189830"),spain_core$wikidataid),]$name <- 63 | c("Francisco Ramos Fernández-Torrecilla", "Celestino Corbacho Chaves", 64 | "Germán Renau Martínez", "José Miguel González Moraga", 65 | "María Pía Sánchez Fernández", "Héctor Illueca Ballester", 66 | "Yolanda Seva Ruiz", "Xavier Tárrega Bernal", 67 | "Salvador Fernández Moreda", "María Margarita Robles Fernández", 68 | "Andrés Lorite Lorite", "Mercedes Toledo Silvestre", 69 | "Pío Pérez Laserna", "María Soledad Sánchez Jódar", 70 | "Margarita Prohens Rigo", "Pedro Duque", 71 | "Julio de España Moya") 72 | spain_core <- spain_core %>% filter(!(wikidataid %in% c("1-miss-wd", "20-miss-wd", "21-miss-wd", "4-miss-wd"))) 73 | 74 | # format traffic data ------------------------------------------------------------------- 75 | spain_traffic <- readRDS("./data/spain_traffic") 76 | spain_traffic$date <- spain_traffic$date %>% as.POSIXct(tz = "UTC") 77 | 78 | # format history data ------------------------------------------------------------------- 79 | spain_history <- readRDS("./data/spain_history") 80 | spain_history <- spain_history %>% select(pageid = pageid_unique, revid, parentid, user, 81 | userid, timestamp, size, comment) 82 | spain_history$timestamp <- spain_history$timestamp %>% str_replace("T", " ") %>% 83 | as.POSIXct(tz = "UTC") 84 | 85 | # format facial data -------------------------------------------------------------------- 86 | spain_faces <- readRDS("./data/spain_faces") 87 | spain_faces <- spain_faces[,-1] 88 | 89 | # save data ----------------------------------------------------------------------------- 90 | spain_social <- readRDS("./data/spain_social") 91 | spain_positions <- readRDS("./data/spain_positions") 92 | spain_occupation <- readRDS("./data/spain_occupation") 93 | spain_id <- readRDS("./data/spain_id") 94 | saveRDS(spain_core, "./package/legislatoR-data-v1.0.0/esp_core") 95 | saveRDS(spain_political, "./package/legislatoR-data-v1.0.0/esp_political") 96 | saveRDS(spain_history, "./package/legislatoR-data-v1.0.0/esp_history") 97 | saveRDS(spain_traffic, "./package/legislatoR-data-v1.0.0/esp_traffic") 98 | saveRDS(spain_social, "./package/legislatoR-data-v1.0.0/esp_social") 99 | saveRDS(spain_faces, "./package/legislatoR-data-v1.0.0/esp_portrait") 100 | saveRDS(spain_positions, "./package/legislatoR-data-v1.0.0/esp_office") 101 | saveRDS(spain_occupation, "./package/legislatoR-data-v1.0.0/esp_profession") 102 | saveRDS(spain_id, "./package/legislatoR-data-v1.0.0/esp_ids") 103 | 104 | 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # legislatoR: Interface to the Comparative
Legislators Database 2 | 3 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 4 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/legislatoR)](https://cran.r-project.org/package=legislatoR) 5 | [![GitHub release version](https://img.shields.io/github/release/saschagobel/legislatoR.svg?style=flat)](https://github.com/saschagobel/legislatoR/releases) 6 | [![CRAN_Download_Badge](https://cranlogs.r-pkg.org/badges/grand-total/legislatoR)](https://cran.rstudio.com/web/packages/legislatoR/index.html) 7 | 8 | legislatoR is a package for the software environment R that facilitates access to the [Comparative Legislators Database (CLD)](https://complegdatabase.com/). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 67,000 contemporary and historical politicians from 16 countries. Data are also available for download in .csv and .sqlite formats at the [CLD's Dataverse](https://dataverse.harvard.edu/dataverse/cld). 9 | 10 | ## Content and data structure 11 | The CLD covers the following countries and time periods: 12 | 13 | | Country | Legislative sessions | Politicians (unique*) | Integrated with | 14 | | ------------------------------------ | --------------------------- | -------------------- | ------------------ | 15 | | Austria (Nationalrat) | all 27
(1920-2019) | 1,923 | [ParlSpeech V2](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN) (Rauh/Schwalbach 2020) | 16 | | Brazil (Câmara dos Deputados) | 38-57
(1947-2022) | 3,474 | | 17 | | Canada (House of Commons) | all 44
(1867-2021) | 4,567 | | 18 | | Czech Republic (Poslanecka Snemovna) | all 9
(1992-2021) | 1,124 | [ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017) | 19 | | France (Assemblée) | all 16
(1958-2022) | 4,263 | | 20 | | Germany (Bundestag) | all 20
(1949-2021) | 4,371 | [BTVote data](https://dataverse.harvard.edu/dataverse/btvote) (Bergmann et al. 2018),
[ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017),
[Reelection Prospects data](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/EBEDPI) (Stoffel/Sieberer 2017) | 21 | | Ireland (Dail) | all 33
(1918-2020) | 1,408 | [Database of Parliamentary Speeches in Ireland](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/6MZN76) (Herzog/Mikhaylov 2017) | 22 | | Israel (Knesset) | all 25
(1949-2022) | 1,022 | | 23 | | Italy (Camera dei deputati and Senato della Repubblica) | all 19
(1948-2022) | 5,149 | | 24 | | Japan (Shūgiin) | all 49
(1890-2021) | 6,581 | | 25 | | Netherlands (Tweede Kamer) | all 65
(1815-2021) | 1,887 | | 26 | | Scotland (Parliament) | all 6
(1999-2021) | 348 | [ParlScot](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/EQ9WBE) (Braby/Fraser 2021) | 27 | | Spain (Congreso de los Diputados) | all 14
(1979-2019) | 2,616 | [ParlSpeech V2](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN) (Rauh/Schwalbach 2020) | 28 | | Turkey (Büyük Millet Meclisi) | all 27
(1920-2018) | 5,298 | | 29 | | United Kingdom (House of Commons) | all 58
(1801-2019) | 11,321 | [EggersSpirling data](https://github.com/ArthurSpirling/EggersSpirlingDatabase) (starting from
38th session, Eggers/Spirling 2014),
[ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017) | 30 | | United States (House and Senate) | all 117
(1789-2021) | 12,593 | [Voteview data](https://voteview.com/data) (Lewis et al. 2019),
[Congressional Bills Project data](http://www.congressionalbills.org/) (Adler/Wilkserson 2018) | 31 | | **16** | **529** | **67,945** | **12** | 32 | 33 | \* We only count legislators with a unique Wikipedia page or Wikidata ID. Sometimes legislators do not have either. Such cases are indicated by the string "miss" in the wikidataid or pageid. 34 | 35 | For each legislature, the CLD holds nine tables: 36 | 37 | 1. *Core* (sociodemographic data) 38 | 2. *Political* (political data) 39 | 3. *History* (full revision records of individual Wikipedia biographies) 40 | 4. *Traffic* (daily user traffic on individual Wikipedia biographies starting from July 2007) 41 | 5. *Social* (social media handles and personal website URLs) 42 | 6. *Portraits* (URLs to portraits) 43 | 7. *Offices* (public offices) 44 | 8. *Professions* (professions) 45 | 9. *IDs* (identifiers linking politicians to other files, databases, or websites) 46 | 47 | The tables contain the following variables (see respective R help files for further details): 48 | 49 | - *Core*: Country, Wikipedia page ID, Wikidata ID, Wikipedia Title, full name, sex, ethnicity, religion, date of birth and death, place of birth and death. 50 | - *Political*: Wikipedia page ID, legislative session, party affiliation, lower constituency, upper constituency, constituency ID, start and end date of legislative session, period of service, majority status, leader positions. 51 | - *History*: Wikipedia page ID, Wikipedia revision and previous revision ID, editor name/IP and ID, revision date and time, revision size, revision comment. 52 | - *Traffic*: Wikipedia page ID, date, user traffic. 53 | - *Social*: Wikidata ID, Twitter handle, Facebook handle, Youtube ID, Google Plus ID, Instagram handle, LinkedIn ID, personal website URL. 54 | - *Portraits*: Wikipedia page ID, Wikipedia portrait URL. 55 | - *Offices*: Wikidata ID, a range of offices such as attorney general, chief justice, mayor, party chair, secretary of state, etc. 56 | - *Professions*: Wikidata ID, a range of professions such as accountant, farmer, historian, judge, mechanic, police officer, salesperson, teacher, etc. 57 | - *IDs*: Wikidata ID, IDs for integration with various political science datsets as well as a range of other IDs such as parliamentary website IDs, Library of Congress or German National Library IDs, Notable Names Database or Project Vote Smart IDs, etc. 58 | 59 | Note that for some legislatures or legislative periods, tables may only hold information for a subset of politicians or variables. 60 | 61 | The CLD comes as a relational database. This means that all tables can be joined with the *Core* table via one of two keys - the Wikipedia page ID or the Wikidata ID. These keys uniquely identify individual politicians. The figure below illustrates this structure and the CLD's content. 62 | 63 |

64 | 65 |

66 | 67 | ## Installation 68 | legislatoR is available through CRAN and GitHub. To install the package from CRAN, type: 69 | 70 | ```r 71 | install.packages("legislatoR") 72 | ``` 73 | 74 | To install the package from Github, type: 75 | 76 | ```r 77 | devtools::install_github("saschagobel/legislatoR") 78 | ``` 79 | 80 | ## Usage 81 | 82 | A working Internet connection is required to access the CLD in R. This is because the data are stored online and not installed together with the package. The package provides table-specific function calls. These functions are named after the respective table (see [above](#content-and-data-structure)) and preceded by `get_`. To fetch the *Core* table, use the `get_core()` function, for the *Political* table, use the `get_political()` function. Call the package help file via `?legislatoR()` to get an overview of all function calls. Tables are legislature-specific, so a three-letter country code must be passed as an argument to the function. Here is a breakdown of all country codes. You can also call the `cld_content()` function to get an overview of the CLD's scope and valid country codes. 83 | 84 | | Country | Code | Country | Code | Country | Code | 85 | | ------------------------- |:-----------:| ---------------- |:----------:| ---------------------- |:------------------------:| 86 | | Austria | `aut` | Ireland | `irl` | Spain | `esp` | 87 | | Brazil | `bra` | Israel | `isr` | Turkey | `tur` | 88 | | Canada | `can` | Italy | `ita_house`/`ita_senate` | United Kingdom | `gbr` | 89 | | Czech Republic | `cze` | Japan | `jpn` | United States | `usa_house`/`usa_senate` | | 90 | | France | `fra` | Netherlands | `nld` | | | 91 | | Germany | `deu` | Scotland | `sco` | | | 92 | 93 | Tables can be joined and subsetted while being fetched and memory is only allocated by the parts of a table assigned into the environment. Basic fetching, joining, and subsetting of data are illustrated below. See the Vignette [Introducing legislatoR](http://htmlpreview.github.com/?https://github.com/saschagobel/legislatoR/blob/master/vignettes/legislatoR.html) for a detailed tutorial. 94 | 95 | ```r 96 | # load and attach legislatoR and dplyr 97 | library(legislatoR) 98 | library(dplyr) 99 | 100 | # assign entire Core table for the German Bundestag into the environment 101 | deu_politicians <- get_core(legislature = "deu") 102 | 103 | # assign data for the 8th legislative session into the environment 104 | deu_politicians_subset <- semi_join(x = get_core(legislature = "deu"), 105 | y = filter(get_political(legislature = "deu"), session == 8), 106 | by = "pageid") 107 | 108 | # join deu_politicians_subset with respective traffic on Wikipedia biographies 109 | deu_traffic <- left_join(x = deu_politicians_subset, 110 | y = get_traffic(legislature = "deu"), 111 | by = "pageid") 112 | 113 | # assign birthdate for members of the political party 'SPD' into the environment 114 | deu_birthdates_SPD <- semi_join(x = select(get_core(legislature = "deu"), pageid, birth), 115 | y = filter(get_political(legislature = "deu"), party == "SPD"), 116 | by = "pageid")$birth 117 | ``` 118 | 119 | ## News 120 | See [here](NEWS.md) for details on updates. 121 | 122 | ## Glossary 123 | See [here](GLOSSARY.md) for the full form of abbreviated country codes and party names and English translations of non-English party names. 124 | 125 | ## Sources 126 | The CLD was predominantly built using automated data extraction techniques. See the [source code](source) and [this list](SOURCES.md) of Web sources for more details. 127 | 128 | ## Citation 129 | Thank you for using the CLD and legislatoR! Please consider citing: 130 | 131 | Göbel, Sascha and Simon Munzert. 2022. "[The Comparative Legislators Database](https://www.cambridge.org/core/journals/british-journal-of-political-science/article/abs/comparative-legislators-database/D28BB58A8B2C08C8593DB741F42C18B2)". *British Journal of Political Science*, 52(3), 1398-1408. 132 | 133 | ## Support 134 | The work on this package was in part funded by the Daimler and Benz Foundation (Funding period 2017/18; project "Citizen and Elite Activity on the Wikipedia Market Place of Political Information"). 135 | 136 | Many thanks to Anna Wunderling for designing legislatoR's logo. 137 | 138 | ## Author information 139 | 140 | **Sascha Göbel** (corresponding author and repository maintainer)
141 | Goethe University Frankfurt
142 | Faculty of Social Sciences
143 | Theodor-W.-Adorno-Platz 6
144 | 60323 Frankfurt am Main, Germany
145 | Email: sascha.goebel@soz.uni-frankfurt.de 146 | 147 | **Simon Munzert**
148 | Hertie School 149 | -------------------------------------------------------------------------------- /vignettes/legislatoR.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introducing legislatoR" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Introducing legislatoR} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\usepackage[utf8]{inputenc} 8 | --- 9 | ```{r include = FALSE} 10 | LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") 11 | knitr::opts_chunk$set(comment = "#>", collapse = TRUE, message = FALSE) 12 | ``` 13 | 14 | 15 | 16 | ###### Sascha Göbel and Simon Munzert -- April, 2020 17 | 18 | **legislatoR** facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries. Information is stored in nine topically distinguished tables for each country and arranged in a relational fashion. 19 | 20 | This vignette provides an introduction on how to use **legislatoR** to access and make the most of the information stored in the CLD. 21 | 22 | ## General access to the CLD 23 | Basic access to the CLD works through table-specific functions. Functions are named after the table they fetch and preceded by "get_". The table below lists data tables and corresponding function calls. Alternatively, you can call `?legislatoR()` to get an overview of all the functions in **legislatoR**. 24 | 25 | | Table | Function | Description | Key | 26 | | :---------- | :------------------ | :-------------------------------------------------------------------- | :----------------- | 27 | | Core | `get_core()` | Fetches sociodemographic data of legislators | pageid, wikidataid | 28 | | Political | `get_political()` | Fetches political data of legislators | pageid | 29 | | History | `get_history()` | Fetches full revision histories of legislators' Wikipedia biographies | pageid | 30 | | Traffic | `get_traffic()` | Fetches daily user traffic on legislators' Wikipedia biographies | pageid | 31 | | Social | `get_social()` | Fetches social media handles and website URLs of legislators | wikidataid | 32 | | Portraits | `get_portrait()` | Fetches portrait urls of legislators | pageid | 33 | | Offices | `get_office()` | Fetches political and other offices of legislators | wikidataid | 34 | | Professions | `get_profession()` | Fetches occupational data of legislators | wikidataid | 35 | | IDs | `get_ids()` | Fetches a range of IDs of legislators | wikidataid | 36 | 37 | Every "get_" function has a "legislature" argument that takes a character string specifying the three-letter country code of the legislature for which a table shall be fetched. The table below lists all legislatures available in the CLD together with their three-letter country code. Alternatively, you can call `?cld_content()` to get an overview of the CLD's scope and valid three-letter country codes. This will also show you the sessions available for each legislature. 38 | 39 | | Legislature | Code | 40 | | :----------------------------------- | :----------------------- | 41 | | Austria (Nationalrat) | `aut` | 42 | | Canada (House of Commons) | `can` | 43 | | Czech Republic (Poslanecka Snemovna) | `cze` | 44 | | France (Assemblée) | `fra` | 45 | | Germany (Bundestag) | `deu` | 46 | | Ireland (Dail) | `irl` | 47 | | Scotland (Parliament) | `sco` | 48 | | Spain (Congreso de los Diputados) | `esp` | 49 | | United Kingdom (House of Commons) | `gbr` | 50 | | United States (House and Senate) | `usa_house`/`usa_senate` | 51 | 52 | Here are some examples for fetching full tables for different countries. All tables come in a tidy (long) format. Every row represents a politician and every column a variable. 53 | 54 | ```{r eval = LOCAL} 55 | library(legislatoR) 56 | library(tibble) 57 | 58 | # get "Core" table for the United States House ------------------------------------------ 59 | usa_house_core <- get_core(legislature = "usa_house") 60 | glimpse(usa_house_core) 61 | 62 | # get "Political" table for the German Bundestag ---------------------------------------- 63 | deu_political <- get_political(legislature = "deu") 64 | glimpse(deu_political) 65 | 66 | # get "IDs" table for the Spanish Congreso ---------------------------------------------- 67 | esp_ids <- get_ids(legislature = "esp") 68 | glimpse(esp_ids) 69 | ``` 70 | 71 | ## Targeted access to the CLD 72 | **legislatoR** also facilitates more targeted access to the CLD than by simply downloading whole tables. Two legislator-specific keys, the Wikipedia page and the Wikidata ID, link all tables to the "Core" table. This allows for mutating and filtering joins using a popular grammar of data manipulation implemented in the 'dplyr' package. The table above lists the relevant key for each data table in the CLD. Here are some examples for combining and subsetting data from different tables. We always start from the "Core" table since it identifies legislators by name and country and never holds a legislator twice. 73 | 74 | ```{r eval = LOCAL} 75 | library(dplyr) 76 | 77 | # combine "Core" and "Political" tables for the Irish Dail ------------------------------ 78 | irl_join <- left_join(x = get_core(legislature = "irl"), 79 | y = get_political(legislature = "irl"), 80 | by = "pageid") 81 | glimpse(irl_join) 82 | 83 | # then add the "Social" table ----------------------------------------------------------- 84 | irl_join <- left_join(x = irl_join, 85 | y = get_social(legislature = "irl"), 86 | by = "wikidataid") 87 | glimpse(irl_join) 88 | 89 | # get "Core" table for Scottish Liberal Democrats 90 | sco_subset <- semi_join(x = get_core(legislature = "sco"), 91 | y = filter(get_political(legislature = "sco"), 92 | party == "Scottish Liberal Democrats"), 93 | by = "pageid") 94 | glimpse(sco_subset) 95 | 96 | # combine "Core" and "Political" tables for German Bundestag CDU/CSU and AfD members ---- 97 | deu_subset <- inner_join(x = get_core(legislature = "deu"), 98 | y = filter(get_political(legislature = "deu"), 99 | party %in% c("CDU", "CSU", "AfD")), 100 | by = "pageid") 101 | glimpse(deu_subset) 102 | 103 | # combine "Core" and "Political" tables for female legislators from the 37th Canadian 104 | # House of Commons ---------------------------------------------------------------------- 105 | can_subset <- inner_join(x = filter(get_core(legislature = "can"), sex == "female"), 106 | y = filter(get_political(legislature = "can"), session == 37), 107 | by = "pageid") 108 | glimpse(can_subset) 109 | 110 | # combine "Core", "Traffic", and "Social" tables for UK House Commons members with 111 | # Twitter handles ----------------------------------------------------------------------- 112 | uk_subset <- left_join(x = inner_join(x = get_core(legislature = "gbr"), 113 | y = filter(get_social(legislature = "gbr"), !is.na(twitter)), 114 | by = "wikidataid"), 115 | y = get_traffic(legislature = "gbr"), 116 | by = "pageid") 117 | glimpse(uk_subset) 118 | ``` 119 | 120 | Of course, you can also use the pipe operator `%>%` from the 'magrittr' package to improve code readability and reach your goal in less steps. 121 | 122 | ```{r eval = LOCAL} 123 | library(magrittr) 124 | 125 | # combine "Core", "IDs", and "Portraits" tables for the Austrian Nationalrat ------------ 126 | aut_join <- get_core(legislature = "aut") %>% 127 | left_join(get_ids(legislature = "aut"), 128 | by = "wikidataid") %>% 129 | left_join(get_portrait(legislature = "aut"), 130 | by = "pageid") 131 | glimpse(aut_join) 132 | 133 | # get "Core" table for high-profile politicians (top 1% of Wikipedia page views) of 134 | # French Assemblée ---------------------------------------------------------------------- 135 | fra_subset <- get_traffic(legislature = "fra") %>% 136 | group_by(pageid) %>% 137 | summarise(total_traffic = sum(traffic)) %>% 138 | filter(total_traffic >= quantile(total_traffic, probs = 0.99)) %>% 139 | semi_join(x = get_core(legislature = "fra"), 140 | y = ., 141 | by = "pageid") 142 | glimpse(fra_subset) 143 | ``` 144 | 145 | ## Integrating with other sources 146 | The CLD is integrated with several other data projects. You can call `?get_ids()` to get an overview of all projects the CLD is integrated with and how respective IDs are named. Here are two examples that show how to use the IDs to join the CLD with other projects. The first example integrates the "Core" table for the Spanish Congreso with a small one-month-extract of the ParlSpeech V2 data (Rauh and Schwalbach 2020). The second example integrates the "Core" and "Political" tables for the Irish Dail with a small one-month-extract of the Database of Parliamentary Speeches in Ireland (Herzog and Mikhaylov 2017). 147 | 148 | ```{r eval = LOCAL} 149 | library(stringr) 150 | 151 | # import ParlSpeech example and rename ID to match CLD ---------------------------------- 152 | parlspeech_example <- readRDS("parlspeech_example") %>% 153 | rename(parlspeech = speaker) 154 | 155 | # remove whitespace from start and end of the ID in ParlSpeech -------------------------- 156 | parlspeech_example$parlspeech <- str_trim(parlspeech_example$parlspeech) 157 | 158 | # integrate CLD with ParlSpeech example ------------------------------------------------- 159 | esp_speeches <- get_core(legislature = "esp") %>% 160 | left_join(get_ids(legislature = "esp"), 161 | by = "wikidataid") %>% 162 | filter(!is.na(parlspeech)) %>% 163 | inner_join(parlspeech_example, 164 | by = "parlspeech") 165 | 166 | # import Database of Parliamentary Speeches in Ireland example and rename ID ------------ 167 | dpsi_example <- readRDS("dpsi_example") %>% 168 | rename(dpsi = memberID) 169 | 170 | # integrate CLD with ParlSpeech example ------------------------------------------------- 171 | irl_speeches <- get_core(legislature = "irl") %>% 172 | inner_join(filter(get_political(legislature = "irl"), session == 28), 173 | by = "pageid") %>% 174 | left_join(get_ids(legislature = "irl"), 175 | by = "wikidataid") %>% 176 | inner_join(dpsi_example, 177 | by = "dpsi") 178 | ``` 179 | 180 | ## Map over legislatures 181 | So far we have accessed the CLD legislature by legislature. It is also possible to retrieve data for multiple legislatures at once with the help of the `cld_content()` function. This function returns the three-letter country codes for all legislatures available in the CLD as well as the available legislative sessions. This helps to conveniently map over legislatures. In the first example below we `purrr::map()` over the names of all legislatures to get a list of "Core" tables. In the second example, we do the same and additionally join with the respective "Political" tables cut to the last three legislative sessions. To achieve this, we call `cld_content()` within `purrr::map()` one more time, passing the name of the respective legislature to get all available sessions, of which we then select the last three sessions to filter the "Political" tables accordingly before joining with the "Core" table. You can always pass a vector of three-letter country codes to the "legislature" argument of `cld_content()` beforehand or otherwise subset the list returned by the function to select a specific subset of legislatures. 182 | 183 | ```{r eval = LOCAL} 184 | library(purrr) 185 | 186 | # get "Core" table for all legislatures ------------------------------------------------- 187 | all_core <- cld_content() %>% 188 | names() %>% 189 | map(get_core) 190 | glimpse(all_core) 191 | 192 | # get "Core" and "Political" tables for last three sessions of all legislatures ---------- 193 | recent_sessions <- cld_content() %>% 194 | names() %>% 195 | map(~ { 196 | get_core(legislature = .x) %>% 197 | inner_join(filter(get_political(legislature = .x), 198 | session %in% tail(cld_content(.x)[[1]], 3)), 199 | by = "pageid") 200 | }) 201 | glimpse(recent_sessions) 202 | ``` 203 | 204 | ## Other Formats 205 | You do not have to be an R user to work with the CLD. If you are more familiar in conducting analyses with other software, such as Excel, SAS, STATA, or SPSS, you can use **legislatoR** to get the data you require as illustrated above and then export it into the desired format as shown below. 206 | 207 | ```{r eval = FALSE} 208 | library(haven) 209 | 210 | # save data as .csv for use with Excel -------------------------------------------------- 211 | write.csv(fra_subset, "fra_subset.csv") 212 | 213 | # save data as .sas for use with SAS ---------------------------------------------------- 214 | write_sas(sco_subset, "sco_subset.sas") 215 | 216 | # save data as .dta for use with STATA -------------------------------------------------- 217 | write_dta(irl_join, "irl_join.dta") 218 | 219 | # save data as .sav for use with SPSS --------------------------------------------------- 220 | write_sav(esp_speeches, "esp_speeches.sav") 221 | ``` 222 | -------------------------------------------------------------------------------- /GLOSSARY.md: -------------------------------------------------------------------------------- 1 | # Glossary 2 | 3 | #### Country codes 4 | * AUT - Austria 5 | * CAN - Canada 6 | * CZE - Czech Republic 7 | * FRA - France 8 | * DEU - Germany 9 | * IRL - Ireland 10 | * SCO - Scotland 11 | * ESP - Spain 12 | * GBR - United Kingdom 13 | * USA - United States of America 14 | 15 | #### Austria party abbreviations and translations 16 | * BZÖ - Bündnis Zukunft Österreich (Alliance for the Future of Austria) 17 | * CSP - Christlichsoziale Partei (Christian Social Party) 18 | * FPÖ - Freiheitliche Partei Österreichs (Freedom Party of Austria) 19 | * GdP - Großdeutsche Volkspartei (Greater German People's Party) 20 | * GRÜNE - Die Grüne Alternative (The Greens – The Green Alternative) 21 | * HB - Heimatblock 22 | * KPÖ - Kommunistische Partei Österreichs (Communist Party of Austria) 23 | * LBd - Landbund für Österreich 24 | * LIF - Liberales Forum (Liberal Forum) 25 | * NEOS - Das Neue Österreich und Liberales Forum (The New Austria and Liberal Forum) 26 | * none - Independent 27 | * NWB - Nationaler Wirtschaftsblock (National Economic Block and Landbund) 28 | * ÖVP - Österreichische Volkspartei (Austrian People's Party) 29 | * PILZ - Liste Peter Pilz 30 | * SdP - Sozialdemokratische Partei (Social Democratic Party) 31 | * SPÖ - Sozialdemokratische Partei Österreichs (Social Democratic Party of Austria) 32 | * STRONACH - Team Stronach für Österreich 33 | * VO - Wahlgemeinschaft Österreichische Volksopposition (Electoral Community of the Austrian People's Opposition) 34 | * WdU - Wahlpartei der Unabhängigen (Federation of Independents) 35 | 36 | #### Czech Republic party abbreviations and translations 37 | * ANO - Akce nespokojených obcanu (Action of Dissatisfied Citizens) 38 | * CSS - Ceská strana národne sociální (Czech National Social Party) 39 | * CSSD - Ceská strana sociálne demokratická (Czech Social Democratic Party) 40 | * DSP - Demokratická strana práce (Democratic Labour Party) 41 | * HSD–SMS - Hnutí za samosprávnou demokracii–Spolecnost pro Moravu a Slezsko (Movement for Autonomous Democracy–Party for Moravia and Silesia) 42 | * KDS - Krestanskodemokratická strana (Christian Democratic Party) 43 | * KDU–CSL - Krestanská a demokratická unie – Ceskoslovenská strana lidová (Christian and Democratic Union – Czechoslovak People's Party) 44 | * KSCM - Komunistická strana Cech a Moravy (Communist Party of Bohemia and Moravia) 45 | * LSU - Liberálne sociální unie (Liberal-Social Union) 46 | * MNS - Moravská národní strana (Moravian National Party) 47 | * ODA - Obcanská demokratická aliance (Civic Democratic Alliance) 48 | * ODS - Obcanská demokratická strana (Civic Democratic Party) 49 | * Piráti - Ceská pirátská strana (Czech Pirate Party) 50 | * SPD - Svoboda a prímá demokracie - Tomio Okamura (Freedom and Direct Democracy – Tomio Okamura) 51 | * SPR-RSC - Sdružení pro republiku - Republikánská strana Ceskoslovenska (Rally for the Republic – Republican Party of Czechoslovakia) 52 | * STAN - Starostové a nezávislí (Mayors and Independents) 53 | * SZ - Strana zelených (Green Party) 54 | * TOP_09 - Tradice Odpovednost Prosperita (Tradition Responsibility Prosperity) 55 | * US-DEU - Unie Svobody–Demokratická unie (Freedom Union–Democratic Union) 56 | * Úsvit - Úsvit – Národní koalice (Dawn – National Coalition) 57 | * VV - Veci verejné (Public Affairs) 58 | 59 | #### France party abbreviations and translations 60 | * AC - Alliance centriste (Centrist Alliance) 61 | * Agir - Agir, la droite constructive (Act, the Constructive Right) 62 | * CD - Centre démocratique (Democratic Center) 63 | * CE - Calédonie ensemble (Caledonia Together) 64 | * DL - Démocratie libérale (Liberal Democracy) 65 | * DLF - Debout la France (France Arise) 66 | * DVD - Divers droite (Miscellaneous right) 67 | * DVG - Divers gauche (Miscellaneous left) 68 | * E! - Ensemble! 69 | * EAS - Regroupement national pour l'unité de la République (National Grouping for the Unity of the Republic) 70 | * ED - Entente démocratique (Democratic agreement) 71 | * FD - Front démocrate (Democratic Front) 72 | * FGDS - Fédération de la gauche démocrate et socialiste (Federation of the Democratic and Socialist Left) 73 | * FN - Front national (National Rally) 74 | * GDR - Gauche démocrate et républicaine (Democratic and Republican Left) 75 | * GÉ - Génération écologie (Ecology Generation) 76 | * IPAS - Centre national des indépendants et paysans (National Centre of Independents and Peasants) 77 | * La France insoumise - (Unsubmissive France) 78 | * LC - Les Centristes (The Centrists) 79 | * LP - Les Patriotes (The Patriots) 80 | * LR - Les Républicains (The Republicans) 81 | * LS - Ligue du Sud (Southern League) 82 | * MdP - Mouvement des progressistes (Movement of Progressives) 83 | * MIM - Mouvement indépendantiste martiniquais (Martinican Independence Movement) 84 | * MoDem - Mouvement démocrate et apparentés (Democratic Movement and affiliated group) 85 | * MPR - Pour La Réunion (For Reunion) 86 | * MRC - Mouvement républicain et citoyen (Citizen and Republican Movement) 87 | * MRSL - Mouvement radical, social et libéral (Radical Movement) 88 | * NC - Groupe Nouveau Centre (Group New Center) 89 | * PaC - Pè a Corsica 90 | * PCF - Parti communiste français (French Communist Party) 91 | * PDM - Progrès et démocratie moderne (Progress and Modern Democracy) 92 | * PÉ - Parti écologiste (Ecologist Party) 93 | * PG - Parti de gauche (Left Party) 94 | * PP - Place publique (Public place) 95 | * PPM - Parti progressiste martiniquais (Martinican Progressive Party) 96 | * PS - Parti socialiste (Socialist Party) 97 | * PSG - Parti socialiste guyanais (Guianese Socialist Party) 98 | * PSRG - Parti socialiste et Radicaux de gauche (Socialist Party and Radical Left) 99 | * RCV - Groupe radical, citoyen et vert (Radical group, citizen and green) 100 | * RD - Rassemblement démocratique (Democratic Rally) 101 | * RDM - Rassemblement démocratique pour la Martinique (Martinican Democratic Rally) 102 | * RDS - Réformateurs démocrates sociaux (Social Democratic Reformers) 103 | * RE974 - Rézistans Égalité 974 104 | * REM - La République en marche (The Republic on the move) 105 | * RI - Fédération nationale des républicains indépendants (Independent Republicans) 106 | * RPCD - Républicains populaires et du Centre démocratique (Popular Republicans and Democratic Center) 107 | * RPR - Rassemblement pour la République (Rally for the Republic) 108 | * RRDP - Groupe radical, républicain, démocrate et progressiste (Radical, Republican, Democratic and Progressive) 109 | * SER - Groupe socialiste, écologiste et républicain (Socialist, Ecologist and Republican Group) 110 | * SFIO - Section française de l'Internationale ouvrière (French Section of the Workers' International) 111 | * SRC - Socialiste, radical, citoyen et divers gauche (Socialist, radical, citizen and various left) 112 | * TH - Tavini huiraatira 113 | * UC - Union centriste (Centrist Union) 114 | * UDC - Union du centre (Union of the Center) 115 | * UDF - Union pour la démocratie française (Union for French Democracy) 116 | * UDI - Union des démocrates et indépendants (Union of Democrats and Independent) 117 | * UDR - Union des démocrates pour la République (Union of Democrats for the Republic) 118 | * UMP - Union pour un mouvement populaire (Union for a Popular Movement) 119 | * UNR-UDT - Union pour la nouvelle République-Union démocratique du travail (Union for the New Democratic-Union of Labor) 120 | * UNR - Union pour la nouvelle République (Union for the New Republic) 121 | 122 | #### Germany party abbreviations and translations 123 | * AfD - Alternative für Deutschland (Alternative for Germany) 124 | * AL - Alternative Liste für Demokratie und Umweltschutz (Alternative List for Democracy and Environmental Protection) 125 | * BP - Bayernpartei (Bavaria Party) 126 | * BÜNDNIS 90/DIE GRÜNEN - (Alliance 90/The Greens) 127 | * CDU - Christlich Demokratische Union Deutschlands (Christian Democratic Union of Germany) 128 | * CSU - Christlich-Soziale Union in Bayern (Christian Social Union in Bavaria) 129 | * CVP - Christliche Volkspartei des Saarlandes (Christian People's Party of the Saarland) 130 | * DIE LINKE - Die Linke (The Left) 131 | * DKP-DRP - Deutsche Konservative Partei – Deutsche Rechtspartei (German Right Party) 132 | * DP - Deutsche Partei (German Party) 133 | * DPS - Demokratische Partei Saar (Democratic Party Saar) 134 | * DSU - Deutsche Soziale Union (German Social Union) 135 | * FDP - Freie Demokratische Partei (Free Democratic Party) 136 | * FDV - Freie Deutsche Volkspartei (Free Germany People's Party) 137 | * GB/BHE - Gesamtdeutscher Block/Bund der Heimatvertriebenen und Entrechteten (All-German Bloc/League of Expellees and Deprived of Rights) 138 | * KPD - Kommunistische Partei Deutschlands (Communist Party of Germany) 139 | * PDS - Partei des Demokratischen Sozialismus (Party of Democratic Socialism) 140 | * SPD - Sozialdemokratische Partei Deutschlands (Social Democratic Party of Germany) 141 | * WAV - Wirtschaftliche Aufbau-Vereinigung (Economic Reconstruction Union) 142 | * ZENTRUM - Deutsche Zentrumspartei (Centre Party) 143 | 144 | #### Ireland party translations 145 | * Clann na Poblachta (Family/Children of the Republic) 146 | * Clann na Talmhan (Family/Children of the land) 147 | * Cumann na nGaedheal (Society of the Gaels) 148 | * Fianna Fáil (Soldiers of Destiny) 149 | * Fine Gael (Tribe of the Irish) 150 | * Sinn Féin (We Ourselves) 151 | 152 | #### United Kingdom party translations 153 | * Plaid Cymru (Party of Wales) 154 | * Sinn Féin (We Ourselves) 155 | 156 | #### United States of America House party abbreviations 157 | * A - Native American Party 158 | * AAP - Anti-Administration Party (DR) 159 | * AJ - National Republican Party 160 | * AL - American Labor Party 161 | * ALD - Lecompton Constitution 162 | * AM - Anti-Masonic Party 163 | * AMon - Anti-Monopoly Party 164 | * C - COnservative Party 165 | * CU - Constitutional Union Party 166 | * D - Democratic Party 167 | * D/PPD - Popular Democratic Party (Puerto Rico) 168 | * DFL - Minnesota Democratic–Farmer–Labor Party 169 | * DL - Liberal Party of New York 170 | * DNPL - North Dakota Democratic–Nonpartisan League Party 171 | * DR - Democratic-Republican Party 172 | * F - Federalist Party (Pro-Administration Party) 173 | * Fed - Federalist Party 174 | * FL - Farmer–Labor Party 175 | * FS - Free Soil Party 176 | * Home Rule - Hawaiian Independent Party 177 | * J - Jacksonian Democracy 178 | * L - Socialist Labor Party of America 179 | * Lib - Liberal Party of New York 180 | * LO - Law and Order Party of Rhode Island 181 | * LR - Liberal Republican Party 182 | * N - Nullifier Party 183 | * NaC - Nacionalista Party 184 | * NG - Greenback Party 185 | * NU - National Union Party 186 | * O - Opposition Party 187 | * PAP - Pro-Administration Party (PAP) 188 | * PNP/D - New Progressive Party (Puerto Rico)/Democratic 189 | * PNP/R - New Progressive Party (Puerto Rico)/Republican 190 | * Pop - People's Party 191 | * Prog - Progressive Party 192 | * Proh - Prohibition Party 193 | * R - Republican Party 194 | * RA - Readjuster Party 195 | * S = Silver Party 196 | * Soc - Socialist Party of America 197 | * Socialist - Socialist Party (Puerto Rico) 198 | * SR = Silver Republican Party 199 | * SRi - States' Rights Party 200 | * U - Unionist Party, later Unconditional Unionist Party 201 | * Unionist - Unionist Party (Puerto Rico) 202 | * UPR - Union of Puerto Rico 203 | * W - Whig Party 204 | 205 | #### United States of America Senate party abbreviations 206 | * L - Liberty Party 207 | * IMN - Independence Party of Minnesota 208 | * LR - Liberal Republican Party 209 | -------------------------------------------------------------------------------- /source/integration_czech.R: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------------------- 2 | # legislatoR 3 | # Sascha Göbel and Simon Munzert 4 | # Script: integration with czech parlspeech 5 | # This script was written by Lada Rudnitckaia 6 | # March 2020 7 | # --------------------------------------------------------------------------------------- 8 | 9 | 10 | # devtools::install_github("saschagobel/legislatoR") 11 | library(legislatoR) 12 | library(xlsx) 13 | setwd("") 14 | 15 | if (.Platform$OS.type == 'windows') { 16 | Sys.setlocale(category = 'LC_ALL','English_United States.1250') 17 | } else { 18 | Sys.setlocale(category = 'LC_ALL','en_US.UTF-8') 19 | } 20 | 21 | cze <- get_core(legislature = "cze") 22 | cze_pol <- get_political(legislature = "cze") 23 | load("Corp_PSP.Rdata") # parl speech data 24 | 25 | ## Retrieve year from session 26 | cze_pol$start_year <- format(cze_pol$session_start,"%Y") 27 | psp.corpus$start_year <- substr(psp.corpus$session, start=1, stop=4) 28 | 29 | ## Filter by session 1-7 (as in ParlSpeech) 30 | cze_pol_1_7 <- cze_pol[cze_pol$start_year == 1992 | 31 | cze_pol$start_year == 1996 | 32 | cze_pol$start_year == 1998 | 33 | cze_pol$start_year == 2002 | 34 | cze_pol$start_year == 2006 | 35 | cze_pol$start_year == 2010 | 36 | cze_pol$start_year == 2013,] 37 | 38 | ### PARTY NAMES 39 | ### Since we will use party name for matching, they should be similar in cze_pol and ParlSpeech 40 | ## Check party names in cze_pol 41 | fr <- data.frame(table(as.matrix(cze_pol_1_7$party, useNA = "always"))) 42 | fr 43 | ## Check party names in ParlSpeech 44 | fr_ParlSpeech <- data.frame(table(as.matrix(psp.corpus$party, useNA = "always"))) 45 | fr_ParlSpeech 46 | 47 | ## Change party names in ParlSpeech to respective name from cze_pol 48 | # to change in cze_pol: 49 | # 1 ANO - ANO 50 | # 2 CSS - ? 51 | # 3 CSSD - CSSD 52 | # 4 DSP - ? 53 | # 5 HSD-SMS - HSD-SMS 54 | # 6 KDS - KDS 55 | # 7 KDU-CSL - KDU-CSL 56 | # 8 KSCM - KSCM 57 | # 9 LSU - LSU 58 | # 10 MNS - ? 59 | # 11 ODA - ODA 60 | # 12 ODS - ODS 61 | # 13 SPR-RSC - SPR-RSC 62 | # 14 SZ - SZ 63 | # 15 TOP 09 - TOP09 64 | # 16 US-DEU - US-DEU 65 | # 17 Usvit - Usvit 66 | # 18 VV - VV 67 | 68 | # to change in ParlSpeech: 69 | # 19 CMSS 70 | # 20 CMUS 71 | # 21 HSDMS 72 | # 22 LB 73 | # 23 LSNS 74 | # 24 Nez.-SZ 75 | # 25 Nezarazeni 76 | # 26 ONH 77 | # 27 other 78 | # 28 TOP 09 a Starostove 79 | # 29 US 80 | 81 | cze_pol_1_7$party <- gsub("CSS", "other", cze_pol_1_7$party) 82 | cze_pol_1_7$party <- gsub("DSP", "other", cze_pol_1_7$party) 83 | cze_pol_1_7$party <- gsub("MNS", "other", cze_pol_1_7$party) 84 | cze_pol_1_7$party <- gsub("TOP 09", "TOP09", cze_pol_1_7$party) 85 | 86 | psp.corpus$party <- gsub("CMSS", "other", psp.corpus$party) 87 | psp.corpus$party <- gsub("CMUS", "other", psp.corpus$party) 88 | psp.corpus$party <- gsub("HSDMS", "other", psp.corpus$party) 89 | psp.corpus$party <- gsub("LB", "other", psp.corpus$party) 90 | psp.corpus$party <- gsub("LSNS", "other", psp.corpus$party) 91 | psp.corpus$party <- gsub("Nez.-SZ", "other", psp.corpus$party) 92 | psp.corpus$party <- gsub("Nezarazeni", "other", psp.corpus$party) 93 | psp.corpus$party <- gsub("ONH", "other", psp.corpus$party) 94 | psp.corpus$party <- gsub("TOP 09 a Starostove", "other", psp.corpus$party) 95 | psp.corpus$party <- gsub("US_DEU", "1", psp.corpus$party) 96 | psp.corpus$party <- gsub("Usvit", "2", psp.corpus$party) 97 | psp.corpus$party <- gsub("US", "other", psp.corpus$party) 98 | psp.corpus$party <- gsub("1", "US_DEU", psp.corpus$party) 99 | psp.corpus$party <- gsub("2", "Usvit", psp.corpus$party) 100 | 101 | 102 | ## Add wikidataids and names to cze_pol from cze 103 | cze_pol_1_7$wikidataid <- cze$wikidataid[match(cze_pol_1_7$pageid, cze$pageid)] 104 | cze_pol_1_7$name <- cze$name[match(cze_pol_1_7$pageid, cze$pageid)] 105 | 106 | 107 | ### NAMES 108 | ## Some names include middle names that are not mentioned in ParlSpeech. Let's remove them 109 | ## and add a new column 'name1' 110 | cze_pol_1_7$name_split = strsplit(cze_pol_1_7$name, ' ') 111 | cze_pol_1_7$firstname = sapply(cze_pol_1_7$name_split, function(x) x[1]) 112 | cze_pol_1_7$lastname = sapply(cze_pol_1_7$name_split, function(x) x[length(x)]) 113 | cze_pol_1_7$name1 <- paste(cze_pol_1_7$firstname, cze_pol_1_7$lastname, sep = " ", collapse = NULL) 114 | 115 | ## Concatenate name and party to match both by name and party 116 | cze_pol_1_7$name_party <- paste(cze_pol_1_7$name, cze_pol_1_7$party, sep = " ", collapse = NULL) 117 | cze_pol_1_7$name1_party <- paste(cze_pol_1_7$name1, cze_pol_1_7$party, sep = " ", collapse = NULL) 118 | 119 | 120 | 121 | ################################### Session 1 ################################### 122 | ### Filter by 1st session 123 | cze_pol_1 <- cze_pol_1_7[cze_pol_1_7$start_year == 1992,] 124 | ParlSpeech_1 <- psp.corpus[psp.corpus$start_year == 1993,] 125 | 126 | ### NAMES 127 | ParlSpeech_1$name_party <- paste(ParlSpeech_1$speaker, ParlSpeech_1$party, 128 | sep = " ", collapse = NULL) 129 | ### FREQUENCY 130 | ## Name 131 | fr <- data.frame(table(as.matrix(cze_pol_1$name, useNA = "always"))) 132 | cze_pol_1$uniquename <- fr$Freq[match(cze_pol_1$name, fr$Var1)] 133 | fr_ <- fr[fr$Freq != 1,] 134 | fr_ 135 | 136 | ## Name1 137 | fr <- data.frame(table(as.matrix(cze_pol_1$name1, useNA = "always"))) 138 | cze_pol_1$uniquename1 <- fr$Freq[match(cze_pol_1$name1, fr$Var1)] 139 | fr1_ <- fr[fr$Freq != 1,] 140 | fr1_ 141 | 142 | ## Name + party 143 | fr <- data.frame(table(as.matrix(cze_pol_1$name_party, useNA = "always"))) 144 | cze_pol_1$uniquename_party <- fr$Freq[match(cze_pol_1$name_party, fr$Var1)] 145 | fr_p <- fr[fr$Freq != 1,] 146 | fr_p 147 | 148 | ## Name1 + party 149 | fr <- data.frame(table(as.matrix(cze_pol_1$name1_party, useNA = "always"))) 150 | cze_pol_1$uniquename1_party <- fr$Freq[match(cze_pol_1$name1_party, fr$Var1)] 151 | fr1_p <- fr[fr$Freq != 1,] 152 | fr1_p 153 | 154 | ### MATCHING 155 | ### Match only if: 156 | ### 1. the desired value is still missing 157 | ### 2. the pair name+party is unique in uk_pol (we assume that it's always true in ParlSpeech) 158 | ### 3. if possible, match manually 159 | 160 | ## Create empty columns 161 | cze_pol_1$speaker <- NA 162 | 163 | ## Match by name 164 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename == 1, 165 | ParlSpeech_1$speaker[match(cze_pol_1$name, ParlSpeech_1$speaker)], 166 | cze_pol_1$speaker) 167 | ## Match by name1 168 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename1 == 1, 169 | ParlSpeech_1$speaker[match(cze_pol_1$name1, ParlSpeech_1$speaker)], 170 | cze_pol_1$speaker) 171 | ## Match by name and party 172 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename_party == 1, 173 | ParlSpeech_1$speaker[match(cze_pol_1$name_party, ParlSpeech_1$name_party)], 174 | cze_pol_1$speaker) 175 | ## Match by name1 and party 176 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename1_party == 1, 177 | ParlSpeech_1$speaker[match(cze_pol_1$name1_party, ParlSpeech_1$name_party)], 178 | cze_pol_1$speaker) 179 | # Check the number of still missing data 180 | sum(is.na(cze_pol_1$speaker)) 181 | 182 | 183 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 184 | # Create csv with unmatched data for manual matching (the code is commented since it is 185 | # just preparation for manual matching) 186 | # ParlSpeech_1$unname <- cze_pol_1$uniquename_party[match(ParlSpeech_1$name_party,cze_pol_1$name_party)] 187 | # ParlSpeech_1$unname1 <- cze_pol_1$uniquename1_party[match(ParlSpeech_1$name_party,cze_pol_1$name1_party)] 188 | # ParlSpeech_1$match <- NA 189 | # ParlSpeech_1$match <- ifelse(is.na(ParlSpeech_1$match) & ParlSpeech_1$unname == 1, 190 | # "matched", ParlSpeech_1$match) 191 | # ParlSpeech_1$match <- ifelse(is.na(ParlSpeech_1$match) & ParlSpeech_1$unname1 == 1, 192 | # "matched", ParlSpeech_1$match) 193 | # unmatched_ParlSpeech_1 <- ParlSpeech_1[is.na(ParlSpeech_1$match),] 194 | # unmatched_cze_pol_1 <- cze_pol_1[is.na(cze_pol_1$speaker),] 195 | # unmatched_cze_pol_1 <- unmatched_cze_pol_1[, !(colnames(unmatched_cze_pol_1) %in% c("name_split"))] 196 | # write.xlsx(unmatched_ParlSpeech_1, file = "unmatched_ParlSpeech_1.xlsx", row.names=FALSE) 197 | # write.xlsx(unmatched_cze_pol_1, file = "unmatched_cze_pol_1.xlsx", row.names=FALSE) 198 | 199 | # Match manually 200 | # no matches 201 | 202 | 203 | ### OUTPUT 204 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 205 | cze$ParlSpeech_speaker <- cze_pol_1$speaker[match(cze$wikidataid, cze_pol_1$wikidataid)] 206 | 207 | 208 | 209 | 210 | ################################### Session 2 ################################### 211 | ### Filter by 2d session 212 | cze_pol_2 <- cze_pol_1_7[cze_pol_1_7$start_year == 1996,] 213 | ParlSpeech_2 <- psp.corpus[psp.corpus$start_year == 1996,] 214 | 215 | ### NAMES 216 | ParlSpeech_2$name_party <- paste(ParlSpeech_2$speaker, ParlSpeech_2$party, 217 | sep = " ", collapse = NULL) 218 | ### FREQUENCY 219 | ## Name 220 | fr <- data.frame(table(as.matrix(cze_pol_2$name, useNA = "always"))) 221 | cze_pol_2$uniquename <- fr$Freq[match(cze_pol_2$name, fr$Var1)] 222 | fr_ <- fr[fr$Freq != 1,] 223 | fr_ 224 | 225 | ## Name1 226 | fr <- data.frame(table(as.matrix(cze_pol_2$name1, useNA = "always"))) 227 | cze_pol_2$uniquename1 <- fr$Freq[match(cze_pol_2$name1, fr$Var1)] 228 | fr1_ <- fr[fr$Freq != 1,] 229 | fr1_ 230 | 231 | ## Name + party 232 | fr <- data.frame(table(as.matrix(cze_pol_2$name_party, useNA = "always"))) 233 | cze_pol_2$uniquename_party <- fr$Freq[match(cze_pol_2$name_party, fr$Var1)] 234 | fr_p <- fr[fr$Freq != 1,] 235 | fr_p 236 | 237 | ## Name1 + party 238 | fr <- data.frame(table(as.matrix(cze_pol_2$name1_party, useNA = "always"))) 239 | cze_pol_2$uniquename1_party <- fr$Freq[match(cze_pol_2$name1_party, fr$Var1)] 240 | fr1_p <- fr[fr$Freq != 1,] 241 | fr1_p 242 | 243 | ### MATCHING 244 | ## Create empty columns 245 | cze_pol_2$speaker <- NA 246 | 247 | ## Match by name 248 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename == 1, 249 | ParlSpeech_2$speaker[match(cze_pol_2$name, ParlSpeech_2$speaker)], 250 | cze_pol_2$speaker) 251 | ## Match by name1 252 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename1 == 1, 253 | ParlSpeech_2$speaker[match(cze_pol_2$name1, ParlSpeech_2$speaker)], 254 | cze_pol_2$speaker) 255 | ## Match by name and party 256 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename_party == 1, 257 | ParlSpeech_2$speaker[match(cze_pol_2$name_party, ParlSpeech_2$name_party)], 258 | cze_pol_2$speaker) 259 | ## Match by name1 and party 260 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename1_party == 1, 261 | ParlSpeech_2$speaker[match(cze_pol_2$name1_party, ParlSpeech_2$name_party)], 262 | cze_pol_2$speaker) 263 | # Check the number of still missing data 264 | sum(is.na(cze_pol_2$speaker)) 265 | 266 | 267 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 268 | # Create csv with unmatched data for manual matching (the code is commented since it is 269 | # just preparation for manual matching) 270 | # ParlSpeech_2$unname <- cze_pol_2$uniquename_party[match(ParlSpeech_2$name_party,cze_pol_2$name_party)] 271 | # ParlSpeech_2$unname1 <- cze_pol_2$uniquename1_party[match(ParlSpeech_2$name_party,cze_pol_2$name1_party)] 272 | # ParlSpeech_2$match <- NA 273 | # ParlSpeech_2$match <- ifelse(is.na(ParlSpeech_2$match) & ParlSpeech_2$unname == 1, 274 | # "matched", ParlSpeech_2$match) 275 | # ParlSpeech_2$match <- ifelse(is.na(ParlSpeech_2$match) & ParlSpeech_2$unname1 == 1, 276 | # "matched", ParlSpeech_2$match) 277 | # unmatched_ParlSpeech_2 <- ParlSpeech_2[is.na(ParlSpeech_2$match),] 278 | # cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker), 279 | # cze$ParlSpeech_speaker[match(cze_pol_2$wikidataid, cze$wikidataid)], 280 | # cze_pol_2$speaker) 281 | # unmatched_cze_pol_2 <- cze_pol_2[is.na(cze_pol_2$speaker),] 282 | # unmatched_cze_pol_2 <- unmatched_cze_pol_2[, !(colnames(unmatched_cze_pol_2) %in% c("name_split"))] 283 | # write.xlsx(unmatched_ParlSpeech_2, file = "unmatched_ParlSpeech_2.xlsx", row.names=FALSE) 284 | # write.xlsx(unmatched_cze_pol_2, file = "unmatched_cze_pol_2.xlsx", row.names=FALSE) 285 | 286 | # Match manually 287 | cze_pol_2$speaker[cze_pol_2$wikidataid == "Q10861579"] <- "Zuzka Bebarova Rujbrova" 288 | 289 | 290 | ### OUTPUT 291 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 292 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 293 | cze_pol_2$speaker[match(cze$wikidataid, cze_pol_2$wikidataid)], 294 | cze$ParlSpeech_speaker) 295 | 296 | 297 | 298 | ################################### Session 3 ################################### 299 | ### Filter by 3d session 300 | cze_pol_3 <- cze_pol_1_7[cze_pol_1_7$start_year == 1998,] 301 | ParlSpeech_3 <- psp.corpus[psp.corpus$start_year == 1998,] 302 | 303 | ### NAMES 304 | ParlSpeech_3$name_party <- paste(ParlSpeech_3$speaker, ParlSpeech_3$party, 305 | sep = " ", collapse = NULL) 306 | ### FREQUENCY 307 | ## Name 308 | fr <- data.frame(table(as.matrix(cze_pol_3$name, useNA = "always"))) 309 | cze_pol_3$uniquename <- fr$Freq[match(cze_pol_3$name, fr$Var1)] 310 | fr_ <- fr[fr$Freq != 1,] 311 | fr_ 312 | 313 | ## Name1 314 | fr <- data.frame(table(as.matrix(cze_pol_3$name1, useNA = "always"))) 315 | cze_pol_3$uniquename1 <- fr$Freq[match(cze_pol_3$name1, fr$Var1)] 316 | fr1_ <- fr[fr$Freq != 1,] 317 | fr1_ 318 | 319 | ## Name + party 320 | fr <- data.frame(table(as.matrix(cze_pol_3$name_party, useNA = "always"))) 321 | cze_pol_3$uniquename_party <- fr$Freq[match(cze_pol_3$name_party, fr$Var1)] 322 | fr_p <- fr[fr$Freq != 1,] 323 | fr_p 324 | 325 | ## Name1 + party 326 | fr <- data.frame(table(as.matrix(cze_pol_3$name1_party, useNA = "always"))) 327 | cze_pol_3$uniquename1_party <- fr$Freq[match(cze_pol_3$name1_party, fr$Var1)] 328 | fr1_p <- fr[fr$Freq != 1,] 329 | fr1_p 330 | 331 | ### MATCHING 332 | ## Create empty columns 333 | cze_pol_3$speaker <- NA 334 | 335 | ## Match by name 336 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename == 1, 337 | ParlSpeech_3$speaker[match(cze_pol_3$name, ParlSpeech_3$speaker)], 338 | cze_pol_3$speaker) 339 | ## Match by name1 340 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename1 == 1, 341 | ParlSpeech_3$speaker[match(cze_pol_3$name1, ParlSpeech_3$speaker)], 342 | cze_pol_3$speaker) 343 | ## Match by name and party 344 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename_party == 1, 345 | ParlSpeech_3$speaker[match(cze_pol_3$name_party, ParlSpeech_3$name_party)], 346 | cze_pol_3$speaker) 347 | ## Match by name1 and party 348 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename1_party == 1, 349 | ParlSpeech_3$speaker[match(cze_pol_3$name1_party, ParlSpeech_3$name_party)], 350 | cze_pol_3$speaker) 351 | # Check the number of still missing data 352 | sum(is.na(cze_pol_3$speaker)) 353 | 354 | 355 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 356 | # Create csv with unmatched data for manual matching (the code is commented since it is 357 | # just preparation for manual matching) 358 | # ParlSpeech_3$unname <- cze_pol_3$uniquename_party[match(ParlSpeech_3$name_party,cze_pol_3$name_party)] 359 | # ParlSpeech_3$unname1 <- cze_pol_3$uniquename1_party[match(ParlSpeech_3$name_party,cze_pol_3$name1_party)] 360 | # ParlSpeech_3$match <- NA 361 | # ParlSpeech_3$match <- ifelse(is.na(ParlSpeech_3$match) & ParlSpeech_3$unname == 1, 362 | # "matched", ParlSpeech_3$match) 363 | # ParlSpeech_3$match <- ifelse(is.na(ParlSpeech_3$match) & ParlSpeech_3$unname1 == 1, 364 | # "matched", ParlSpeech_3$match) 365 | # unmatched_ParlSpeech_3 <- ParlSpeech_3[is.na(ParlSpeech_3$match),] 366 | # cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker), 367 | # cze$ParlSpeech_speaker[match(cze_pol_3$wikidataid, cze$wikidataid)], 368 | # cze_pol_3$speaker) 369 | # unmatched_cze_pol_3 <- cze_pol_3[is.na(cze_pol_3$speaker),] 370 | # unmatched_cze_pol_3 <- unmatched_cze_pol_3[, !(colnames(unmatched_cze_pol_3) %in% c("name_split"))] 371 | # write.csv(unmatched_ParlSpeech_3, file = "unmatched_ParlSpeech_3.csv", row.names=FALSE) 372 | # write.xlsx(unmatched_cze_pol_3, file = "unmatched_cze_pol_3.xlsx", row.names=FALSE) 373 | 374 | # Match manually 375 | cze_pol_3$speaker[cze_pol_3$wikidataid == "Q12037465"] <- "Miloslav Kucera ml" 376 | # for older one: Miloslav Kucera or Miloslav Kucera st? 377 | # Check the number of still missing data 378 | sum(is.na(cze_pol_3$speaker)) 379 | 380 | 381 | ### OUTPUT 382 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 383 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 384 | cze_pol_3$speaker[match(cze$wikidataid, cze_pol_3$wikidataid)], 385 | cze$ParlSpeech_speaker) 386 | 387 | 388 | 389 | ################################### Session 4 ################################### 390 | ### Filter by 4th session 391 | cze_pol_4 <- cze_pol_1_7[cze_pol_1_7$start_year == 2002,] 392 | ParlSpeech_4 <- psp.corpus[psp.corpus$start_year == 2002,] 393 | 394 | ### NAMES 395 | ParlSpeech_4$name_party <- paste(ParlSpeech_4$speaker, ParlSpeech_4$party, 396 | sep = " ", collapse = NULL) 397 | cze_pol_4$name2 <- paste(cze_pol_4$lastname, cze_pol_4$firstname, 398 | sep = " ", collapse = NULL) 399 | cze_pol_4$name2_party <- paste(cze_pol_4$name2, cze_pol_4$party, 400 | sep = " ", collapse = NULL) 401 | 402 | ### FREQUENCY 403 | ## Name 404 | fr <- data.frame(table(as.matrix(cze_pol_4$name, useNA = "always"))) 405 | cze_pol_4$uniquename <- fr$Freq[match(cze_pol_4$name, fr$Var1)] 406 | fr_ <- fr[fr$Freq != 1,] 407 | fr_ 408 | 409 | ## Name1 410 | fr <- data.frame(table(as.matrix(cze_pol_4$name1, useNA = "always"))) 411 | cze_pol_4$uniquename1 <- fr$Freq[match(cze_pol_4$name1, fr$Var1)] 412 | fr1_ <- fr[fr$Freq != 1,] 413 | fr1_ 414 | 415 | ## Name2 416 | fr <- data.frame(table(as.matrix(cze_pol_4$name2, useNA = "always"))) 417 | cze_pol_4$uniquename2 <- fr$Freq[match(cze_pol_4$name2, fr$Var1)] 418 | fr2_ <- fr[fr$Freq != 1,] 419 | fr2_ 420 | 421 | ## Name + party 422 | fr <- data.frame(table(as.matrix(cze_pol_4$name_party, useNA = "always"))) 423 | cze_pol_4$uniquename_party <- fr$Freq[match(cze_pol_4$name_party, fr$Var1)] 424 | fr_p <- fr[fr$Freq != 1,] 425 | fr_p 426 | 427 | ## Name1 + party 428 | fr <- data.frame(table(as.matrix(cze_pol_4$name1_party, useNA = "always"))) 429 | cze_pol_4$uniquename1_party <- fr$Freq[match(cze_pol_4$name1_party, fr$Var1)] 430 | fr1_p <- fr[fr$Freq != 1,] 431 | fr1_p 432 | 433 | ## Name2 + party 434 | fr <- data.frame(table(as.matrix(cze_pol_4$name2_party, useNA = "always"))) 435 | cze_pol_4$uniquename2_party <- fr$Freq[match(cze_pol_4$name2_party, fr$Var1)] 436 | fr2_p <- fr[fr$Freq != 1,] 437 | fr2_p 438 | 439 | 440 | ### MATCHING 441 | ## Create empty columns 442 | cze_pol_4$speaker <- NA 443 | 444 | ## Match by name and party 445 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename_party == 1, 446 | ParlSpeech_4$speaker[match(cze_pol_4$name_party, ParlSpeech_4$name_party)], 447 | cze_pol_4$speaker) 448 | ## Match by name1 and party 449 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename1_party == 1, 450 | ParlSpeech_4$speaker[match(cze_pol_4$name1_party, ParlSpeech_4$name_party)], 451 | cze_pol_4$speaker) 452 | ## Match by name 453 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename == 1, 454 | ParlSpeech_4$speaker[match(cze_pol_4$name, ParlSpeech_4$speaker)], 455 | cze_pol_4$speaker) 456 | ## Match by name1 457 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename1 == 1, 458 | ParlSpeech_4$speaker[match(cze_pol_4$name1, ParlSpeech_4$speaker)], 459 | cze_pol_4$speaker) 460 | # Check the number of still missing data 461 | sum(is.na(cze_pol_4$speaker)) 462 | ## Match by name2 and party 463 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename2_party == 1, 464 | ParlSpeech_4$speaker[match(cze_pol_4$name2_party, ParlSpeech_4$name_party)], 465 | cze_pol_4$speaker) 466 | ## Match by name2 467 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename2 == 1, 468 | ParlSpeech_4$speaker[match(cze_pol_4$name2, ParlSpeech_4$speaker)], 469 | cze_pol_4$speaker) 470 | # Check the number of still missing data 471 | sum(is.na(cze_pol_4$speaker)) 472 | 473 | 474 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 475 | # Create csv with unmatched data for manual matching (the code is commented since it is 476 | # just preparation for manual matching) 477 | # ParlSpeech_4$unname <- cze_pol_4$uniquename_party[match(ParlSpeech_4$name_party,cze_pol_4$name_party)] 478 | # ParlSpeech_4$unname1 <- cze_pol_4$uniquename1_party[match(ParlSpeech_4$name_party,cze_pol_4$name1_party)] 479 | # ParlSpeech_4$match <- NA 480 | # ParlSpeech_4$match <- ifelse(is.na(ParlSpeech_4$match) & ParlSpeech_4$unname == 1, 481 | # "matched", ParlSpeech_4$match) 482 | # ParlSpeech_4$match <- ifelse(is.na(ParlSpeech_4$match) & ParlSpeech_4$unname1 == 1, 483 | # "matched", ParlSpeech_4$match) 484 | # unmatched_ParlSpeech_4 <- ParlSpeech_4[is.na(ParlSpeech_4$match),] 485 | # cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker), 486 | # cze$ParlSpeech_speaker[match(cze_pol_4$wikidataid, cze$wikidataid)], 487 | # cze_pol_4$speaker) 488 | # unmatched_cze_pol_4 <- cze_pol_4[is.na(cze_pol_4$speaker),] 489 | # unmatched_cze_pol_4 <- unmatched_cze_pol_4[, !(colnames(unmatched_cze_pol_4) %in% c("name_split"))] 490 | 491 | # Match manually 492 | # no matches 493 | 494 | ### OUTPUT 495 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 496 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 497 | cze_pol_4$speaker[match(cze$wikidataid, cze_pol_4$wikidataid)], 498 | cze$ParlSpeech_speaker) 499 | 500 | 501 | 502 | ################################### Session 5 ################################### 503 | ### Filter by 5th session 504 | cze_pol_5 <- cze_pol_1_7[cze_pol_1_7$start_year == 2006,] 505 | ParlSpeech_5 <- psp.corpus[psp.corpus$start_year == 2006,] 506 | 507 | ### NAMES 508 | ParlSpeech_5$name_party <- paste(ParlSpeech_5$speaker, ParlSpeech_5$party, 509 | sep = " ", collapse = NULL) 510 | cze_pol_5$name2 <- paste(cze_pol_5$lastname, cze_pol_5$firstname, 511 | sep = " ", collapse = NULL) 512 | cze_pol_5$name2_party <- paste(cze_pol_5$name2, cze_pol_5$party, 513 | sep = " ", collapse = NULL) 514 | 515 | ### FREQUENCY 516 | ## Name 517 | fr <- data.frame(table(as.matrix(cze_pol_5$name, useNA = "always"))) 518 | cze_pol_5$uniquename <- fr$Freq[match(cze_pol_5$name, fr$Var1)] 519 | fr_ <- fr[fr$Freq != 1,] 520 | fr_ 521 | 522 | ## Name1 523 | fr <- data.frame(table(as.matrix(cze_pol_5$name1, useNA = "always"))) 524 | cze_pol_5$uniquename1 <- fr$Freq[match(cze_pol_5$name1, fr$Var1)] 525 | fr1_ <- fr[fr$Freq != 1,] 526 | fr1_ 527 | 528 | ## Name + party 529 | fr <- data.frame(table(as.matrix(cze_pol_5$name_party, useNA = "always"))) 530 | cze_pol_5$uniquename_party <- fr$Freq[match(cze_pol_5$name_party, fr$Var1)] 531 | fr_p <- fr[fr$Freq != 1,] 532 | fr_p 533 | 534 | ## Name1 + party 535 | fr <- data.frame(table(as.matrix(cze_pol_5$name1_party, useNA = "always"))) 536 | cze_pol_5$uniquename1_party <- fr$Freq[match(cze_pol_5$name1_party, fr$Var1)] 537 | fr1_p <- fr[fr$Freq != 1,] 538 | fr1_p 539 | 540 | ## Name2 541 | fr <- data.frame(table(as.matrix(cze_pol_5$name2, useNA = "always"))) 542 | cze_pol_5$uniquename2 <- fr$Freq[match(cze_pol_5$name2, fr$Var1)] 543 | fr2_ <- fr[fr$Freq != 1,] 544 | fr2_ 545 | 546 | ## Name2 + party 547 | fr <- data.frame(table(as.matrix(cze_pol_5$name2_party, useNA = "always"))) 548 | cze_pol_5$uniquename2_party <- fr$Freq[match(cze_pol_5$name2_party, fr$Var1)] 549 | fr2_p <- fr[fr$Freq != 1,] 550 | fr2_p 551 | 552 | 553 | ### MATCHING 554 | ## Create empty columns 555 | cze_pol_5$speaker <- NA 556 | 557 | ## Match by name and party 558 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename_party == 1, 559 | ParlSpeech_5$speaker[match(cze_pol_5$name_party, ParlSpeech_5$name_party)], 560 | cze_pol_5$speaker) 561 | ## Match by name 562 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename == 1, 563 | ParlSpeech_5$speaker[match(cze_pol_5$name, ParlSpeech_5$speaker)], 564 | cze_pol_5$speaker) 565 | ## Match by name1 and party 566 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename1_party == 1, 567 | ParlSpeech_5$speaker[match(cze_pol_5$name1_party, ParlSpeech_5$name_party)], 568 | cze_pol_5$speaker) 569 | ## Match by name1 570 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename1 == 1, 571 | ParlSpeech_5$speaker[match(cze_pol_5$name1, ParlSpeech_5$speaker)], 572 | cze_pol_5$speaker) 573 | # Check the number of still missing data 574 | sum(is.na(cze_pol_5$speaker)) 575 | ## Match by name2 and party 576 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename2_party == 1, 577 | ParlSpeech_5$speaker[match(cze_pol_5$name2_party, ParlSpeech_5$name_party)], 578 | cze_pol_5$speaker) 579 | ## Match by name2 580 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename2 == 1, 581 | ParlSpeech_5$speaker[match(cze_pol_5$name2, ParlSpeech_5$speaker)], 582 | cze_pol_5$speaker) 583 | # Check the number of still missing data 584 | sum(is.na(cze_pol_5$speaker)) 585 | 586 | 587 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 588 | # Create csv with unmatched data for manual matching (the code is commented since it is 589 | # just preparation for manual matching) 590 | ParlSpeech_5$unname <- cze_pol_5$uniquename_party[match(ParlSpeech_5$name_party,cze_pol_5$name_party)] 591 | ParlSpeech_5$unname1 <- cze_pol_5$uniquename1_party[match(ParlSpeech_5$name_party,cze_pol_5$name1_party)] 592 | ParlSpeech_5$match <- NA 593 | ParlSpeech_5$match <- ifelse(is.na(ParlSpeech_5$match) & ParlSpeech_5$unname == 1, 594 | "matched", ParlSpeech_5$match) 595 | ParlSpeech_5$match <- ifelse(is.na(ParlSpeech_5$match) & ParlSpeech_5$unname1 == 1, 596 | "matched", ParlSpeech_5$match) 597 | unmatched_ParlSpeech_5 <- ParlSpeech_5[is.na(ParlSpeech_5$match),] 598 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker), 599 | cze$ParlSpeech_speaker[match(cze_pol_5$wikidataid, cze$wikidataid)], 600 | cze_pol_5$speaker) 601 | unmatched_cze_pol_5 <- cze_pol_5[is.na(cze_pol_5$speaker),] 602 | unmatched_cze_pol_5 <- unmatched_cze_pol_5[, !(colnames(unmatched_cze_pol_5) %in% c("name_split"))] 603 | 604 | # Match manually 605 | # no matches 606 | 607 | 608 | ### OUTPUT 609 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 610 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 611 | cze_pol_5$speaker[match(cze$wikidataid, cze_pol_5$wikidataid)], 612 | cze$ParlSpeech_speaker) 613 | 614 | 615 | 616 | ################################### Session 6 ################################### 617 | ### Filter by 6th session 618 | cze_pol_6 <- cze_pol_1_7[cze_pol_1_7$start_year == 2010,] 619 | ParlSpeech_6 <- psp.corpus[psp.corpus$start_year == 2010,] 620 | 621 | ### NAMES 622 | ParlSpeech_6$name_party <- paste(ParlSpeech_6$speaker, ParlSpeech_6$party, 623 | sep = " ", collapse = NULL) 624 | ### FREQUENCY 625 | ## Name 626 | fr <- data.frame(table(as.matrix(cze_pol_6$name, useNA = "always"))) 627 | cze_pol_6$uniquename <- fr$Freq[match(cze_pol_6$name, fr$Var1)] 628 | fr_ <- fr[fr$Freq != 1,] 629 | fr_ 630 | 631 | ## Name1 632 | fr <- data.frame(table(as.matrix(cze_pol_6$name1, useNA = "always"))) 633 | cze_pol_6$uniquename1 <- fr$Freq[match(cze_pol_6$name1, fr$Var1)] 634 | fr1_ <- fr[fr$Freq != 1,] 635 | fr1_ 636 | 637 | ## Name + party 638 | fr <- data.frame(table(as.matrix(cze_pol_6$name_party, useNA = "always"))) 639 | cze_pol_6$uniquename_party <- fr$Freq[match(cze_pol_6$name_party, fr$Var1)] 640 | fr_p <- fr[fr$Freq != 1,] 641 | fr_p 642 | 643 | ## Name1 + party 644 | fr <- data.frame(table(as.matrix(cze_pol_6$name1_party, useNA = "always"))) 645 | cze_pol_6$uniquename1_party <- fr$Freq[match(cze_pol_6$name1_party, fr$Var1)] 646 | fr1_p <- fr[fr$Freq != 1,] 647 | fr1_p 648 | 649 | ### MATCHING 650 | ## Create empty columns 651 | cze_pol_6$speaker <- NA 652 | 653 | ## Match by name 654 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename == 1, 655 | ParlSpeech_6$speaker[match(cze_pol_6$name, ParlSpeech_6$speaker)], 656 | cze_pol_6$speaker) 657 | ## Match by name1 658 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename1 == 1, 659 | ParlSpeech_6$speaker[match(cze_pol_6$name1, ParlSpeech_6$speaker)], 660 | cze_pol_6$speaker) 661 | ## Match by name and party 662 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename_party == 1, 663 | ParlSpeech_6$speaker[match(cze_pol_6$name_party, ParlSpeech_6$name_party)], 664 | cze_pol_6$speaker) 665 | ## Match by name1 and party 666 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename1_party == 1, 667 | ParlSpeech_6$speaker[match(cze_pol_6$name1_party, ParlSpeech_6$name_party)], 668 | cze_pol_6$speaker) 669 | # Check the number of still missing data 670 | sum(is.na(cze_pol_6$speaker)) 671 | 672 | 673 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 674 | # Create csv with unmatched data for manual matching (the code is commented since it is 675 | # just preparation for manual matching) 676 | # ParlSpeech_6$unname <- cze_pol_6$uniquename_party[match(ParlSpeech_6$name_party,cze_pol_6$name_party)] 677 | # ParlSpeech_6$unname1 <- cze_pol_6$uniquename1_party[match(ParlSpeech_6$name_party,cze_pol_6$name1_party)] 678 | # ParlSpeech_6$match <- NA 679 | # ParlSpeech_6$match <- ifelse(is.na(ParlSpeech_6$match) & ParlSpeech_6$unname == 1, 680 | # "matched", ParlSpeech_6$match) 681 | # ParlSpeech_6$match <- ifelse(is.na(ParlSpeech_6$match) & ParlSpeech_6$unname1 == 1, 682 | # "matched", ParlSpeech_6$match) 683 | # unmatched_ParlSpeech_6 <- ParlSpeech_6[is.na(ParlSpeech_6$match),] 684 | # cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker), 685 | # cze$ParlSpeech_speaker[match(cze_pol_6$wikidataid, cze$wikidataid)], 686 | # cze_pol_6$speaker) 687 | # unmatched_cze_pol_6 <- cze_pol_6[is.na(cze_pol_6$speaker),] 688 | # unmatched_cze_pol_6 <- unmatched_cze_pol_6[, !(colnames(unmatched_cze_pol_6) %in% c("name_split"))] 689 | 690 | # Match manually 691 | cze_pol_6$speaker[cze_pol_6$wikidataid == "Q12026431"] <- "Josef Novotny ml" 692 | cze_pol_6$speaker[cze_pol_6$wikidataid == "Q12026433"] <- "Josef Novotny st" 693 | 694 | 695 | ### OUTPUT 696 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid 697 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 698 | cze_pol_6$speaker[match(cze$wikidataid, cze_pol_6$wikidataid)], 699 | cze$ParlSpeech_speaker) 700 | 701 | 702 | 703 | ################################### Session 7 ################################### 704 | ### Filter by 7th session 705 | cze_pol_7 <- cze_pol_1_7[cze_pol_1_7$start_year == 2013,] 706 | ParlSpeech_7 <- psp.corpus[psp.corpus$start_year == 2013,] 707 | 708 | ### NAMES 709 | ParlSpeech_7$name_party <- paste(ParlSpeech_7$speaker, ParlSpeech_7$party, 710 | sep = " ", collapse = NULL) 711 | ### FREQUENCY 712 | ## Name 713 | fr <- data.frame(table(as.matrix(cze_pol_7$name, useNA = "always"))) 714 | cze_pol_7$uniquename <- fr$Freq[match(cze_pol_7$name, fr$Var1)] 715 | fr_ <- fr[fr$Freq != 1,] 716 | fr_ 717 | 718 | ## Name1 719 | fr <- data.frame(table(as.matrix(cze_pol_7$name1, useNA = "always"))) 720 | cze_pol_7$uniquename1 <- fr$Freq[match(cze_pol_7$name1, fr$Var1)] 721 | fr1_ <- fr[fr$Freq != 1,] 722 | fr1_ 723 | 724 | ## Name + party 725 | fr <- data.frame(table(as.matrix(cze_pol_7$name_party, useNA = "always"))) 726 | cze_pol_7$uniquename_party <- fr$Freq[match(cze_pol_7$name_party, fr$Var1)] 727 | fr_p <- fr[fr$Freq != 1,] 728 | fr_p 729 | 730 | ## Name1 + party 731 | fr <- data.frame(table(as.matrix(cze_pol_7$name1_party, useNA = "always"))) 732 | cze_pol_7$uniquename1_party <- fr$Freq[match(cze_pol_7$name1_party, fr$Var1)] 733 | fr1_p <- fr[fr$Freq != 1,] 734 | fr1_p 735 | 736 | ### MATCHING 737 | ## Create empty columns 738 | cze_pol_7$speaker <- NA 739 | 740 | ## Match by name 741 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename == 1, 742 | ParlSpeech_7$speaker[match(cze_pol_7$name, ParlSpeech_7$speaker)], 743 | cze_pol_7$speaker) 744 | ## Match by name1 745 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename1 == 1, 746 | ParlSpeech_7$speaker[match(cze_pol_7$name1, ParlSpeech_7$speaker)], 747 | cze_pol_7$speaker) 748 | ## Match by name and party 749 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename_party == 1, 750 | ParlSpeech_7$speaker[match(cze_pol_7$name_party, ParlSpeech_7$name_party)], 751 | cze_pol_7$speaker) 752 | ## Match by name1 and party 753 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename1_party == 1, 754 | ParlSpeech_7$speaker[match(cze_pol_7$name1_party, ParlSpeech_7$name_party)], 755 | cze_pol_7$speaker) 756 | # Check the number of still missing data 757 | sum(is.na(cze_pol_7$speaker)) 758 | 759 | 760 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol) 761 | # Create csv with unmatched data for manual matching (the code is commented since it is 762 | # just preparation for manual matching) 763 | ParlSpeech_7$unname <- cze_pol_7$uniquename_party[match(ParlSpeech_7$name_party,cze_pol_7$name_party)] 764 | ParlSpeech_7$unname1 <- cze_pol_7$uniquename1_party[match(ParlSpeech_7$name_party,cze_pol_7$name1_party)] 765 | ParlSpeech_7$match <- NA 766 | ParlSpeech_7$match <- ifelse(is.na(ParlSpeech_7$match) & ParlSpeech_7$unname == 1, 767 | "matched", ParlSpeech_7$match) 768 | ParlSpeech_7$match <- ifelse(is.na(ParlSpeech_7$match) & ParlSpeech_7$unname1 == 1, 769 | "matched", ParlSpeech_7$match) 770 | unmatched_ParlSpeech_7 <- ParlSpeech_7[is.na(ParlSpeech_7$match),] 771 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker), 772 | cze$ParlSpeech_speaker[match(cze_pol_7$wikidataid, cze$wikidataid)], 773 | cze_pol_7$speaker) 774 | unmatched_cze_pol_7 <- cze_pol_7[is.na(cze_pol_7$speaker),] 775 | unmatched_cze_pol_7 <- unmatched_cze_pol_7[, !(colnames(unmatched_cze_pol_7) %in% c("name_split"))] 776 | 777 | # Match manually 778 | cze_pol_7$speaker[cze_pol_7$wikidataid == "Q13721260"] <- "Tomas Jan Podivinsky" 779 | 780 | 781 | ### OUTPUT 782 | ## Add ParlSpeech speaker's id to cze from cze_pol by wikidataid 783 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker), 784 | cze_pol_7$speaker[match(cze$wikidataid, cze_pol_7$wikidataid)], 785 | cze$ParlSpeech_speaker) 786 | 787 | 788 | 789 | ################################### THE OUTPUT DATAFRAME ################################### 790 | # output 791 | cze_output <- subset(cze, select = c('wikidataid', 'ParlSpeech_speaker')) 792 | sum(!is.na(cze_output$ParlSpeech_speaker)) 793 | # We found 880 out of 1096 (according pdf to ParlSpeech data set) 794 | cze_output <- cze_output[!is.na(cze_output$ParlSpeech_speaker),] 795 | saveRDS(cze_output, "./data/pol_sci_data/cze_output") 796 | 797 | 798 | 799 | 800 | --------------------------------------------------------------------------------