├── .gitignore
├── R
├── sysdata.rda
├── legislatoR.R
├── cld_content.R
├── get_office.R
├── get_portrait.R
├── get_profession.R
├── get_traffic.R
├── get_social.R
├── get_history.R
├── get_core.R
├── get_political.R
└── get_ids.R
├── images
├── sticker.jpg
└── data-structure.png
├── vignettes
├── dpsi_example
├── sticker.jpg
├── parlspeech_example
└── legislatoR.Rmd
├── CRAN-SUBMISSION
├── .Rbuildignore
├── NAMESPACE
├── legislatoR.Rproj
├── tests
├── testthat.R
└── testthat
│ ├── test-cld_content.R
│ ├── test-errors.R
│ ├── test-get_ids.R
│ ├── test-get_history.R
│ ├── test-get_office.R
│ ├── test-get_social.R
│ ├── test-get_traffic.R
│ ├── test-get_portrait.R
│ ├── test-get_political.R
│ ├── test-get_profession.R
│ └── test-get_core.R
├── SOURCES.md
├── source
├── sticker.R
├── packages.R
├── preparation_spain.R
└── integration_czech.R
├── DESCRIPTION
├── man
├── cld_content.Rd
├── legislatoR.Rd
├── get_office.Rd
├── get_portrait.Rd
├── get_profession.Rd
├── get_traffic.Rd
├── get_social.Rd
├── get_history.Rd
├── get_core.Rd
├── get_political.Rd
└── get_ids.Rd
├── NEWS.md
├── README.md
└── GLOSSARY.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 |
--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/R/sysdata.rda
--------------------------------------------------------------------------------
/images/sticker.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/images/sticker.jpg
--------------------------------------------------------------------------------
/vignettes/dpsi_example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/dpsi_example
--------------------------------------------------------------------------------
/vignettes/sticker.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/sticker.jpg
--------------------------------------------------------------------------------
/images/data-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/images/data-structure.png
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.1.0
2 | Date: 2023-03-31 20:38:53 UTC
3 | SHA: df17df975945a3ee6bb302e1be2b3a588d1d8065
4 |
--------------------------------------------------------------------------------
/vignettes/parlspeech_example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saschagobel/legislatoR/HEAD/vignettes/parlspeech_example
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^GLOSSARY\.md$
4 | ^README\.md$
5 | ^SOURCES\.md$
6 | ^images$
7 | ^source$
8 | ^workshop$
9 | ^\.travis\.yml$
10 | ^CRAN-SUBMISSION$
11 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(cld_content)
4 | export(get_core)
5 | export(get_history)
6 | export(get_ids)
7 | export(get_office)
8 | export(get_political)
9 | export(get_portrait)
10 | export(get_profession)
11 | export(get_social)
12 | export(get_traffic)
13 | import(dplyr)
14 | importFrom(curl,nslookup)
15 |
--------------------------------------------------------------------------------
/legislatoR.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(legislatoR)
3 |
4 | test_check("legislatoR", filter = "cld_content")
5 | test_check("legislatoR", filter = "errors")
6 | test_check("legislatoR", filter = "get_core")
7 | test_check("legislatoR", filter = "get_history")
8 | test_check("legislatoR", filter = "get_ids")
9 | test_check("legislatoR", filter = "get_office")
10 | test_check("legislatoR", filter = "get_political")
11 | test_check("legislatoR", filter = "get_portrait")
12 | test_check("legislatoR", filter = "get_profession")
13 | test_check("legislatoR", filter = "get_social")
14 | test_check("legislatoR", filter = "get_traffic")
15 |
16 |
--------------------------------------------------------------------------------
/SOURCES.md:
--------------------------------------------------------------------------------
1 | * Additional religious affiliations from http://www.adherents.com/adh_congress.html
2 | * [Face++ Cognitive Services API](https://www.faceplusplus.com/)
3 | * [Czech Republic Parliamentary Members Archive](http://public.psp.cz/sqw/fsnem.sqw?zvo=1)
4 | * [Germany Bundestag Open Data](https://www.bundestag.de/service/opendata)
5 | * [Spain Parliamentary Members Archive](http://www.congreso.es/portal/page/portal/Congreso/Congreso/Diputados)
6 | * Additional Twitter handles from https://github.com/oduwsdl/US-Congress
7 | * Additional Twitter handles provided by Bruno Castanho Silva and Sven-Oliver Proksch
8 | * [Wikimedia Commons](https://commons.wikimedia.org/)
9 | * [Wikimedia API](https://wikimedia.org/)
10 | * [Wikidata API](https://www.wikidata.org/)
11 | * [Wikipedia](https://de.wikipedia.org/)
12 | * [Wikipedia API](https://en.wikipedia.org/w/api.php)
13 |
--------------------------------------------------------------------------------
/source/sticker.R:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------------------------------------------------
2 | # legislatoR
3 | # Sascha Göbel and Simon Munzert
4 | # Script: logo
5 | # December 2017
6 | # ---------------------------------------------------------------------------------------
7 |
8 | library(hexSticker)
9 | library(showtext)
10 |
11 | setwd("D:/Sascha/projects/legislatoR/images")
12 |
13 |
14 | font_add_google("IM Fell French Canon SC", "political")
15 |
16 |
17 |
18 | sticker(subplot = "new_logo.png",
19 | package = "legislatoR",
20 | p_x = 1,
21 | p_y = 1.45,
22 | p_color = "black",
23 | p_size = 70,
24 | p_family = "political",
25 | h_size = 1.5,
26 | h_fill = "white",
27 | h_color = "black",
28 | asp = 8,
29 | s_x = 1,
30 | s_y = 0.7,
31 | spotlight = FALSE,
32 | filename = "sticker.jpg",
33 | white_around_sticker = TRUE,
34 | dpi = 1000
35 | )
36 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: legislatoR
2 | Title: Interface to the Comparative Legislators Database
3 | Description: Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 67,000 contemporary and historical politicians from 16 countries.
4 | Version: 1.1.0
5 | Authors@R: c(
6 | person("Sascha", "Goebel", email = "sascha.goebel@soz.uni-frankfurt.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9032-5874")),
7 | person("Simon", "Munzert", role = "aut"))
8 | URL: https://github.com/saschagobel/legislatoR
9 | BugReports: https://github.com/saschagobel/legislatoR/issues
10 | License: GPL-3
11 | Depends:
12 | R (>= 3.5.0)
13 | Imports:
14 | curl (>= 3.0),
15 | dplyr (>= 0.7.4)
16 | Suggests:
17 | testthat,
18 | rmarkdown,
19 | knitr,
20 | magrittr,
21 | purrr,
22 | stringr,
23 | tibble
24 | VignetteBuilder:
25 | knitr
26 | Encoding: UTF-8
27 | RoxygenNote: 7.1.2
28 |
--------------------------------------------------------------------------------
/source/packages.R:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------------------------------------------------
2 | # legislatoR
3 | # Sascha Göbel, Simon Munzert
4 | # Script: Packages
5 | # December 2017
6 | # ---------------------------------------------------------------------------------------
7 |
8 |
9 | #### INSTALL AND LOAD PACKAGES ==========================================================
10 |
11 | # install pacman package if not installed -----------------------------------------------
12 | suppressWarnings(if (!require("pacman")) install.packages("pacman"))
13 |
14 | # load packages and install if not installed --------------------------------------------
15 | pacman::p_load(stringr, lubridate, magrittr, plyr, dplyr, eeptools, httr,
16 | rvest, toOrdinal, mpoly, data.table, zoo, jsonlite, R.utils,
17 | WikidataR, tibble, pageviews, wikipediatrend, padr, gtools,readxl, haven,
18 | tidyselect, reshape2, ggplot2, extrafont, finalfit, purrr, vroom,
19 | install = TRUE,
20 | update = FALSE)
21 |
22 | # show loaded packages ------------------------------------------------------------------
23 | cat("loaded packages\n")
24 | print(pacman::p_loaded())
25 |
--------------------------------------------------------------------------------
/R/legislatoR.R:
--------------------------------------------------------------------------------
1 | #' legislatoR
2 | #'
3 | #' Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries.
4 | #'
5 | #' @section legislatoR functions:
6 | #'
7 | #' \code{\link{cld_content}}: returns a named list of legislatures and sessions available in the CLD.\cr
8 | #'
9 | #' \code{\link{get_core}}: fetches sociodemographic data of legislators.\cr
10 | #'
11 | #' \code{\link{get_political}}: fetches political data of legislators.\cr
12 | #'
13 | #' \code{\link{get_history}}: fetches full revision histories of legislators' Wikipedia biographies.\cr
14 | #'
15 | #' \code{\link{get_traffic}}: fetches daily user traffic on legislators' Wikipedia biographies.\cr
16 | #'
17 | #' \code{\link{get_social}}: fetches social media handles and website URLs of legislators.\cr
18 | #'
19 | #' \code{\link{get_portrait}}: fetches portrait urls of legislators.\cr
20 | #'
21 | #' \code{\link{get_office}}: fetches political and other offices of legislators.\cr
22 | #'
23 | #' \code{\link{get_profession}} fetches occupational data of legislators.\cr
24 | #'
25 | #' \code{\link{get_ids}}: fetches a range of IDs of legislators.\cr
26 | #'
27 | #' @docType package
28 | #' @name legislatoR
29 | NULL
30 |
--------------------------------------------------------------------------------
/man/cld_content.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cld_content.R
3 | \name{cld_content}
4 | \alias{cld_content}
5 | \title{List content of the CLD}
6 | \usage{
7 | cld_content(legislature = NULL)
8 | }
9 | \arguments{
10 | \item{legislature}{An optional character string specifying one or more legislatures. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. If NULL (the default), a list with all legislatures and sessions available in the CLD is returned.}
11 | }
12 | \value{
13 | A list with names being three-letter country codes and with each element containing a vector that shows the sessions available for a legislature.
14 | }
15 | \description{
16 | Returns a named list of legislatures and sessions available in the CLD. This provides a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions.
17 | }
18 | \examples{
19 | # Get a list of three-letter country codes and available sessions for all countries
20 | overview <- cld_content()
21 | tibble::glimpse(overview)
22 |
23 | # Get a list of available sessions for the French Assemblée and the Irish Dail
24 | sessions <- cld_content(legislature = c("fra", "irl"))
25 | tibble::glimpse(sessions)
26 | }
27 |
--------------------------------------------------------------------------------
/man/legislatoR.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/legislatoR.R
3 | \docType{package}
4 | \name{legislatoR}
5 | \alias{legislatoR}
6 | \title{legislatoR}
7 | \description{
8 | Facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries.
9 | }
10 | \section{legislatoR functions}{
11 |
12 |
13 | \code{\link{cld_content}}: returns a named list of legislatures and sessions available in the CLD.\cr
14 |
15 | \code{\link{get_core}}: fetches sociodemographic data of legislators.\cr
16 |
17 | \code{\link{get_political}}: fetches political data of legislators.\cr
18 |
19 | \code{\link{get_history}}: fetches full revision histories of legislators' Wikipedia biographies.\cr
20 |
21 | \code{\link{get_traffic}}: fetches daily user traffic on legislators' Wikipedia biographies.\cr
22 |
23 | \code{\link{get_social}}: fetches social media handles and website URLs of legislators.\cr
24 |
25 | \code{\link{get_portrait}}: fetches portrait urls of legislators.\cr
26 |
27 | \code{\link{get_office}}: fetches political and other offices of legislators.\cr
28 |
29 | \code{\link{get_profession}} fetches occupational data of legislators.\cr
30 |
31 | \code{\link{get_ids}}: fetches a range of IDs of legislators.\cr
32 | }
33 |
34 |
--------------------------------------------------------------------------------
/tests/testthat/test-cld_content.R:
--------------------------------------------------------------------------------
1 | test_that("cld_content() returns a named list of integer vectors", {
2 | skip_on_cran()
3 | expect_identical(class(cld_content()), "list")
4 | expect_identical(class(sample(cld_content(), 1)[[1]]), "integer")
5 | })
6 |
7 | test_that("cld_content() works with valid country codes", {
8 | skip_on_cran()
9 | expect_identical(length(cld_content("aut")[[1]]), 27L)
10 | expect_identical(length(cld_content("can")[[1]]), 44L)
11 | expect_identical(length(cld_content("cze")[[1]]), 9L)
12 | expect_identical(length(cld_content("esp")[[1]]), 14L)
13 | expect_identical(length(cld_content("fra")[[1]]), 16L)
14 | expect_identical(length(cld_content("deu")[[1]]), 20L)
15 | expect_identical(length(cld_content("irl")[[1]]), 33L)
16 | expect_identical(length(cld_content("sco")[[1]]), 6L)
17 | expect_identical(length(cld_content("gbr")[[1]]), 58L)
18 | expect_identical(length(cld_content("usa_house")[[1]]), 117L)
19 | expect_identical(length(cld_content("usa_senate")[[1]]), 117L)
20 | })
21 |
22 | test_that("cld_content() works with multiple country codes", {
23 | skip_on_cran()
24 | expect_silent(cld_content(c("aut", "deu")))
25 | expect_silent(cld_content(c("cze", "sco", "gbr")))
26 | expect_silent(cld_content(c("usa_house", "fra", "esp", "can")))
27 | })
28 |
29 | test_that("Error is returned when legislature argument is not a valid country code", {
30 | skip_on_cran()
31 | expect_error(cld_content(NA))
32 | expect_error(cld_content("bla"))
33 | expect_error(cld_content(c("deu", "bla")))
34 | expect_error(cld_content(2))
35 | expect_error(cld_content(TRUE))
36 | })
37 |
--------------------------------------------------------------------------------
/man/get_office.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_office.R
3 | \name{get_office}
4 | \alias{get_office}
5 | \title{Fetch 'Offices' table}
6 | \format{
7 | Data frame in wide format with columns (varies by legislature):
8 | \itemize{
9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | \item{office_1: political or other office held by a legislator (of class \sQuote{logical}).}
11 | \item{office_2: ... (of class \sQuote{logical}).}
12 | \item{...}
13 | }
14 | }
15 | \source{
16 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
17 | }
18 | \usage{
19 | get_office(legislature)
20 | }
21 | \arguments{
22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
23 | }
24 | \value{
25 | A data frame with columns as specified above.
26 | }
27 | \description{
28 | Fetches political and other offices of legislators for the specified legislature. Requires a working Internet connection.
29 | }
30 | \examples{
31 | \donttest{# Get entire 'Offices' table for the United States Senate
32 | usa_offices <- get_office(legislature = "usa_senate")
33 | tibble::glimpse(usa_offices)
34 |
35 | # Get 'Offices' table for male members of the United States Senate
36 | usa_offices_subset <- dplyr::semi_join(x = usa_offices,
37 | y = dplyr::filter(get_core(legislature = "usa_senate"),
38 | sex == "female"),
39 | by = "wikidataid")
40 | tibble::glimpse(usa_offices_subset)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/man/get_portrait.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_portrait.R
3 | \name{get_portrait}
4 | \alias{get_portrait}
5 | \title{Fetch 'Portrait' table}
6 | \format{
7 | Data frame with columns (varies by legislature):
8 | \itemize{
9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | \item{image_url: URL linking to a legislator's portrait on Wikimedia Commons (of class \sQuote{character}).}
11 | }
12 | }
13 | \source{
14 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr
15 | Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page}
16 | }
17 | \usage{
18 | get_portrait(legislature)
19 | }
20 | \arguments{
21 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
22 | }
23 | \value{
24 | A data frame with columns as specified above.
25 | }
26 | \description{
27 | Fetches portrait urls of legislators for the specified legislature. Requires a working Internet connection.
28 | }
29 | \examples{
30 | \donttest{# Get entire 'Portraits' table for the United States Senate
31 | usa_portraits <- get_portrait(legislature = "usa_senate")
32 | tibble::glimpse(usa_portraits)
33 |
34 | # Get 'Portraits' table for Democratic members of the United States Senate
35 | usa_port_subset <- dplyr::semi_join(x = usa_portraits,
36 | y = dplyr::filter(get_political(legislature = "usa_senate"),
37 | party == "D"),
38 | by = "pageid")
39 | tibble::glimpse(usa_port_subset)
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/man/get_profession.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_profession.R
3 | \name{get_profession}
4 | \alias{get_profession}
5 | \title{Fetch 'Professions' table}
6 | \format{
7 | Data frame in wide format with columns (varies by legislature):
8 | \itemize{
9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | \item{occupation_1: occupation a legislator practiced or was trained in (of class \sQuote{logical}).}
11 | \item{occupation_2: ... (of class \sQuote{logical}).}
12 | \item{...}
13 | }
14 | }
15 | \source{
16 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
17 | }
18 | \usage{
19 | get_profession(legislature)
20 | }
21 | \arguments{
22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
23 | }
24 | \value{
25 | A data frame with columns as specified above.
26 | }
27 | \description{
28 | Fetches occupational data of legislators for the specified legislature. Requires a working Internet connection.
29 | }
30 | \examples{
31 | \donttest{# Get entire 'Professions' table for the United States House
32 | usa_offices <- get_office(legislature = "usa_house")
33 | tibble::glimpse(usa_offices)
34 |
35 | # Get 'Professions' table for female members of the United States House
36 | usa_offices_subset <- dplyr::semi_join(x = usa_offices,
37 | y = dplyr::filter(get_core(legislature = "usa_house"),
38 | sex == "female"),
39 | by = "wikidataid")
40 | tibble::glimpse(usa_offices_subset)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/man/get_traffic.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_traffic.R
3 | \name{get_traffic}
4 | \alias{get_traffic}
5 | \title{Fetch Wikipedia 'Traffic' table}
6 | \format{
7 | Data frame with columns:
8 | \itemize{
9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | \item{date: Date for which user traffic is recorded, from 2015-07-01 to 2018-12-31 UTC (of class \sQuote{POSIXct}).}
11 | \item{traffic: Daily non-unique user visits (of class \sQuote{numeric}).}
12 | }
13 | }
14 | \source{
15 | Wikimedia API, \url{https://wikimedia.org/api/rest_v1/} \cr
16 | \url{http://petermeissner.de:8880/}
17 | }
18 | \usage{
19 | get_traffic(legislature)
20 | }
21 | \arguments{
22 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
23 | }
24 | \value{
25 | A data frame with columns as specified above.
26 | }
27 | \description{
28 | Fetches daily user traffic on legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection.
29 | }
30 | \examples{
31 | \donttest{# Get entire 'Traffic' table for the Scottish Parliament
32 | sco_traffic <- get_traffic(legislature = "sco")
33 | tibble::glimpse(sco_traffic)
34 |
35 | # Add Wikidataid to 'Traffic' table for the Scottish Parliament
36 | sco_traffic_subset <- dplyr::inner_join(x = dplyr::select(get_core(legislature = "sco"),
37 | pageid, wikidataid),
38 | y = sco_traffic,
39 | by = "pageid")
40 | tibble::glimpse(sco_traffic_subset)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/man/get_social.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_social.R
3 | \name{get_social}
4 | \alias{get_social}
5 | \title{Fetch 'Social' table}
6 | \format{
7 | Data frame with columns (varies by legislature):
8 | \itemize{
9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | \item{twitter: Twitter handle (of class \sQuote{character}).}
11 | \item{facebook: Facebook handle (of class \sQuote{character}).}
12 | \item{youtube: Youtube ID (of class \sQuote{character}).}
13 | \item{googlep: Google Plus ID (of class \sQuote{character}).}
14 | \item{instagram: Instagram handle (of class \sQuote{character}).}
15 | \item{linkedin: LinkedIn ID (of class \sQuote{character}).}
16 | \item{website: Personal website URL (of class \sQuote{character}).}
17 | }
18 | }
19 | \source{
20 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
21 | }
22 | \usage{
23 | get_social(legislature)
24 | }
25 | \arguments{
26 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
27 | }
28 | \value{
29 | A data frame with columns as specified above.
30 | }
31 | \description{
32 | Fetches social media handles and website URLs of legislators for the specified legislature. Requires a working Internet connection.
33 | }
34 | \examples{
35 | \donttest{# Get entire 'Social' table forthe UK House of Commons
36 | gbr_social <- get_social(legislature = "gbr")
37 | tibble::glimpse(gbr_social)
38 |
39 | # Get 'Social' table for members of the UK House of Commons with available TheyWorkForYou ID
40 | gbr_social_subset <- dplyr::semi_join(x = gbr_social,
41 | y = dplyr::filter(get_ids(legislature = "gbr"),
42 | !is.na(theyworkforyou)),
43 | by = "wikidataid")
44 | tibble::glimpse(gbr_social_subset)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/man/get_history.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_history.R
3 | \name{get_history}
4 | \alias{get_history}
5 | \title{Fetch Wikipedia 'History' table}
6 | \format{
7 | Data frame with columns:
8 | \itemize{
9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | \item{revid: Wikipedia edit ID (of class \sQuote{integer}).}
11 | \item{parentid: Wikipedia edit ID of the previous revision (of class \sQuote{integer}).}
12 | \item{user: Username of registered user responsible for the revision, IP address in case of anonymous revision (of class \sQuote{character}).}
13 | \item{userid: ID of registered user responsible for the revision, 0 in case of anonymous revision (of class \sQuote{integer}).}
14 | \item{timestamp: Date and time of the revision (of class \sQuote{POSIXct}).}
15 | \item{size: Revision size in bytes (of class \sQuote{integer}).}
16 | \item{comment: Revision comment (of class \sQuote{character}).}
17 | }
18 | }
19 | \source{
20 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php}
21 | }
22 | \usage{
23 | get_history(legislature)
24 | }
25 | \arguments{
26 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
27 | }
28 | \value{
29 | A data frame with columns as specified above.
30 | }
31 | \description{
32 | Fetches full revision histories of legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection.
33 | }
34 | \examples{
35 | \donttest{# Get entire 'History' table for the Austrian Nationalrat
36 | aut_history <- get_history(legislature = "aut")
37 | tibble::glimpse(aut_history)
38 |
39 | # Get 'History' table for NEOS party members of the Austrian Nationalrat
40 | aut_history_subset <- dplyr::semi_join(x = aut_history,
41 | y = dplyr::filter(get_political(legislature = "aut"),
42 | party == "NEOS"),
43 | by = "pageid")
44 | tibble::glimpse(aut_history_subset)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/tests/testthat/test-errors.R:
--------------------------------------------------------------------------------
1 | test_that("Error is returned when legislature argument is not specified", {
2 | skip_on_cran()
3 | expect_error(get_core())
4 | expect_error(get_political())
5 | expect_error(get_history())
6 | expect_error(get_traffic())
7 | expect_error(get_social())
8 | expect_error(get_portrait())
9 | expect_error(get_office())
10 | expect_error(get_profession())
11 | expect_error(get_ids())
12 | })
13 |
14 | test_that("Error is returned when legislature argument is not a valid country code", {
15 | skip_on_cran()
16 | expect_error(get_core(NA))
17 | expect_error(get_political(NA))
18 | expect_error(get_history(NA))
19 | expect_error(get_traffic(NA))
20 | expect_error(get_social(NA))
21 | expect_error(get_portrait(NA))
22 | expect_error(get_office(NA))
23 | expect_error(get_profession(NA))
24 | expect_error(get_ids(NA))
25 |
26 | expect_error(get_core("bla"))
27 | expect_error(get_political("bla"))
28 | expect_error(get_history("bla"))
29 | expect_error(get_traffic("bla"))
30 | expect_error(get_social("bla"))
31 | expect_error(get_portrait("bla"))
32 | expect_error(get_office("bla"))
33 | expect_error(get_profession("bla"))
34 | expect_error(get_ids("bla"))
35 |
36 | expect_error(get_core(2))
37 | expect_error(get_political(2))
38 | expect_error(get_history(2))
39 | expect_error(get_traffic(2))
40 | expect_error(get_social(2))
41 | expect_error(get_portrait(2))
42 | expect_error(get_office(2))
43 | expect_error(get_profession(2))
44 | expect_error(get_ids(2))
45 |
46 | expect_error(get_core(TRUE))
47 | expect_error(get_political(TRUE))
48 | expect_error(get_history(TRUE))
49 | expect_error(get_traffic(TRUE))
50 | expect_error(get_social(TRUE))
51 | expect_error(get_portrait(TRUE))
52 | expect_error(get_office(TRUE))
53 | expect_error(get_profession(TRUE))
54 | expect_error(get_ids(TRUE))
55 | })
56 |
57 | test_that("Error is returned when more than one country code is specified in legislature argument ", {
58 | skip_on_cran()
59 | expect_error(get_core(c("aut","deu")))
60 | expect_error(get_political(c("aut","deu")))
61 | expect_error(get_history(c("aut","deu")))
62 | expect_error(get_traffic(c("aut","deu")))
63 | expect_error(get_social(c("aut","deu")))
64 | expect_error(get_portrait(c("aut","deu")))
65 | expect_error(get_office(c("aut","deu")))
66 | expect_error(get_profession(c("aut","deu")))
67 | expect_error(get_ids(c("aut","deu")))
68 | })
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_ids.R:
--------------------------------------------------------------------------------
1 | test_that("IDs table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_ids("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_ids("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_ids("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_ids("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_ids("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_ids("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_ids("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_ids("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_ids("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_ids("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_ids("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_ids("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_ids("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_ids("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_ids("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_ids("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_ids("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_ids("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_ids("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_ids("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_ids("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_ids("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_ids("aut"))[1], c("wikidataid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_ids("can"))[1], c("wikidataid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_ids("cze"))[1], c("wikidataid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_ids("esp"))[1], c("wikidataid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_ids("fra"))[1], c("wikidataid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_ids("deu"))[1], c("wikidataid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_ids("irl"))[1], c("wikidataid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_ids("sco"))[1], c("wikidataid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_ids("gbr"))[1], c("wikidataid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_ids("usa_house"))[1], c("wikidataid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_ids("usa_senate"))[1], c("wikidataid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_history.R:
--------------------------------------------------------------------------------
1 | test_that("Wikipedia History table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_history("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_history("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_history("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_history("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_history("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_history("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_history("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_history("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_history("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_history("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_history("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_history("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_history("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_history("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_history("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_history("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_history("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_history("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_history("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_history("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_history("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_history("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_history("aut"))[1], c("pageid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_history("can"))[1], c("pageid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_history("cze"))[1], c("pageid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_history("esp"))[1], c("pageid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_history("fra"))[1], c("pageid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_history("deu"))[1], c("pageid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_history("irl"))[1], c("pageid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_history("sco"))[1], c("pageid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_history("gbr"))[1], c("pageid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_history("usa_house"))[1], c("pageid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_history("usa_senate"))[1], c("pageid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_office.R:
--------------------------------------------------------------------------------
1 | test_that("Offices table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_office("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_office("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_office("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_office("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_office("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_office("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_office("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_office("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_office("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_office("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_office("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_office("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_office("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_office("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_office("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_office("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_office("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_office("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_office("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_office("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_office("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_office("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_office("aut"))[1], c("wikidataid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_office("can"))[1], c("wikidataid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_office("cze"))[1], c("wikidataid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_office("esp"))[1], c("wikidataid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_office("fra"))[1], c("wikidataid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_office("deu"))[1], c("wikidataid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_office("irl"))[1], c("wikidataid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_office("sco"))[1], c("wikidataid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_office("gbr"))[1], c("wikidataid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_office("usa_house"))[1], c("wikidataid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_office("usa_senate"))[1], c("wikidataid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_social.R:
--------------------------------------------------------------------------------
1 | test_that("Social table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_social("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_social("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_social("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_social("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_social("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_social("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_social("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_social("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_social("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_social("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_social("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_social("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_social("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_social("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_social("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_social("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_social("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_social("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_social("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_social("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_social("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_social("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_social("aut"))[1], c("wikidataid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_social("can"))[1], c("wikidataid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_social("cze"))[1], c("wikidataid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_social("esp"))[1], c("wikidataid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_social("fra"))[1], c("wikidataid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_social("deu"))[1], c("wikidataid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_social("irl"))[1], c("wikidataid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_social("sco"))[1], c("wikidataid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_social("gbr"))[1], c("wikidataid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_social("usa_house"))[1], c("wikidataid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_social("usa_senate"))[1], c("wikidataid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_traffic.R:
--------------------------------------------------------------------------------
1 | test_that("Wikipedia Traffic table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_traffic("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_traffic("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_traffic("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_traffic("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_traffic("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_traffic("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_traffic("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_traffic("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_traffic("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_traffic("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_traffic("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_traffic("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_traffic("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_traffic("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_traffic("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_traffic("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_traffic("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_traffic("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_traffic("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_traffic("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_traffic("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_traffic("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_traffic("aut"))[1], c("pageid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_traffic("can"))[1], c("pageid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_traffic("cze"))[1], c("pageid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_traffic("esp"))[1], c("pageid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_traffic("fra"))[1], c("pageid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_traffic("deu"))[1], c("pageid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_traffic("irl"))[1], c("pageid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_traffic("sco"))[1], c("pageid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_traffic("gbr"))[1], c("pageid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_traffic("usa_house"))[1], c("pageid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_traffic("usa_senate"))[1], c("pageid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_portrait.R:
--------------------------------------------------------------------------------
1 | test_that("Portraits table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_portrait("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_portrait("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_portrait("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_portrait("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_portrait("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_portrait("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_portrait("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_portrait("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_portrait("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_portrait("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_portrait("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_portrait("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_portrait("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_portrait("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_portrait("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_portrait("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_portrait("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_portrait("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_portrait("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_portrait("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_portrait("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_portrait("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_portrait("aut"))[1], c("pageid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_portrait("can"))[1], c("pageid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_portrait("cze"))[1], c("pageid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_portrait("esp"))[1], c("pageid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_portrait("fra"))[1], c("pageid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_portrait("deu"))[1], c("pageid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_portrait("irl"))[1], c("pageid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_portrait("sco"))[1], c("pageid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_portrait("gbr"))[1], c("pageid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_portrait("usa_house"))[1], c("pageid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_portrait("usa_senate"))[1], c("pageid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_political.R:
--------------------------------------------------------------------------------
1 | test_that("Political table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_political("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_political("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_political("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_political("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_political("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_political("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_political("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_political("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_political("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_political("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_political("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_political("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_political("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_political("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_political("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_political("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_political("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_political("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_political("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_political("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_political("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_political("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_political("aut"))[1], c("pageid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_political("can"))[1], c("pageid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_political("cze"))[1], c("pageid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_political("esp"))[1], c("pageid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_political("fra"))[1], c("pageid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_political("deu"))[1], c("pageid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_political("irl"))[1], c("pageid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_political("sco"))[1], c("pageid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_political("gbr"))[1], c("pageid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_political("usa_house"))[1], c("pageid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_political("usa_senate"))[1], c("pageid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/R/cld_content.R:
--------------------------------------------------------------------------------
1 | #' List content of the CLD
2 | #'
3 | #' Returns a named list of legislatures and sessions available in the CLD. This provides a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions.
4 | #'
5 | #' @param legislature An optional character string specifying one or more legislatures. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}. If NULL (the default), a list with all legislatures and sessions available in the CLD is returned.
6 | #' @return A list with names being three-letter country codes and with each element containing a vector that shows the sessions available for a legislature.
7 | #' @examples
8 | #' # Get a list of three-letter country codes and available sessions for all countries
9 | #' overview <- cld_content()
10 | #' tibble::glimpse(overview)
11 | #'
12 | #' # Get a list of available sessions for the French Assemblée and the Irish Dail
13 | #' sessions <- cld_content(legislature = c("fra", "irl"))
14 | #' tibble::glimpse(sessions)
15 | #' @export
16 | cld_content <- function(legislature = NULL) {
17 | output <- list(aut = 1:27, bra = 38:57, can = 1:44, cze = 1:9,
18 | deu = 1:20, esp = 1:14, fra = 1:16, gbr = 1:58,
19 | irl = 1:33, isr = 1:25, ita_house = 1:19, ita_senate = 1:19,
20 | jpn = 1:49, nld = 1:65, sco = 1:6, tur = 1:27,
21 | usa_house = 1:117, usa_senate = 1:117)
22 | if (is.null(legislature)) {
23 | return(output)
24 | } else {
25 | if (any(!(legislature %in% c("aut", "bra", "can", "cze",
26 | "deu", "esp", "fra", "gbr",
27 | "irl", "isr", "ita_house", "ita_senate",
28 | "jpn", "nld", "sco", "tur",
29 | "usa_house", "usa_senate")))) {
30 | legislature <- legislature[which(!(legislature %in% c("aut", "bra", "can", "cze",
31 | "deu", "esp", "fra", "gbr",
32 | "irl", "isr", "ita_house", "ita_senate",
33 | "jpn", "nld", "sco", "tur",
34 | "usa_house", "usa_senate")))]
35 | stop (paste0("\n\nPlease provide valid three-letter country codes. legislatoR does not recognize the country code or does not contain data for ",
36 | paste0(
37 | paste0("\"", legislature, "\""),
38 | collapse = ", "),
39 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
40 | }
41 | output <- output[legislature]
42 | }
43 | return(output)
44 | }
45 |
--------------------------------------------------------------------------------
/man/get_core.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_core.R
3 | \name{get_core}
4 | \alias{get_core}
5 | \title{Fetch 'Core' table}
6 | \format{
7 | Data frame with columns (varies by legislature):
8 | \itemize{
9 | \item{country: ISO 3166-1 alpha-3 three-letter country code (of class \sQuote{character}).}
10 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer} or \sQuote{character}).}
11 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
12 | \item{wikititle: A legislator's undirected Wikipedia title (of class \sQuote{character}).}
13 | \item{name: A legislator's full name (of class \sQuote{character}).}
14 | \item{sex: A legislator's sex (of class \sQuote{character}).}
15 | \item{ethnicity: A legislator's ethnicity (of class \sQuote{character}).}
16 | \item{religion: A legislator's religious denomination (of class \sQuote{character}).}
17 | \item{birth: A legislator's date of birth (of class \sQuote{POSIXct}).}
18 | \item{death: A legislator's date of death (of class \sQuote{POSIXct}).}
19 | \item{birthplace: Comma separated latitude and longitude of a legislator's place of birth (of class \sQuote{character}).}
20 | \item{deathplace: Comma separated latitude and longitude of a legislator's place of death (of class \sQuote{character}).}
21 | }
22 | }
23 | \source{
24 | Wikipedia, \url{https://www.wikipedia.org/} \cr
25 | Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr
26 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} \cr
27 | Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} \cr
28 | Face++ Cognitive Services API, \url{https://www.faceplusplus.com/} \cr
29 | Germany Bundestag Open Data, \url{https://www.bundestag.de/services/opendata}
30 | }
31 | \usage{
32 | get_core(legislature)
33 | }
34 | \arguments{
35 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
36 | }
37 | \value{
38 | A data frame with columns as specified above.
39 | }
40 | \description{
41 | Fetches sociodemographic data of legislators for the specified legislature. Requires a working Internet connection.
42 | }
43 | \examples{
44 | \donttest{# Get entire 'Core' table for the German Bundestag
45 | deu_core <- get_core(legislature = "deu")
46 | tibble::glimpse(deu_core)
47 |
48 | # Get 'Core' table for 16th session of the German Bundestag
49 | deu_core_subset <- dplyr::semi_join(x = deu_core,
50 | y = dplyr::filter(get_political(legislature = "deu"),
51 | session == 16),
52 | by = "pageid")
53 | tibble::glimpse(deu_core_subset)
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_profession.R:
--------------------------------------------------------------------------------
1 | test_that("Professions table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_profession("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_profession("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_profession("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_profession("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_profession("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_profession("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_profession("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_profession("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_profession("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_profession("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_profession("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_profession("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_profession("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_profession("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_profession("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_profession("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_profession("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_profession("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_profession("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_profession("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_profession("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_profession("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_profession("aut"))[1], c("wikidataid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_profession("can"))[1], c("wikidataid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_profession("cze"))[1], c("wikidataid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_profession("esp"))[1], c("wikidataid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_profession("fra"))[1], c("wikidataid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_profession("deu"))[1], c("wikidataid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_profession("irl"))[1], c("wikidataid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_profession("sco"))[1], c("wikidataid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_profession("gbr"))[1], c("wikidataid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_profession("usa_house"))[1], c("wikidataid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_profession("usa_senate"))[1], c("wikidataid"))
70 | Sys.sleep(100)
71 | })
72 |
--------------------------------------------------------------------------------
/man/get_political.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_political.R
3 | \name{get_political}
4 | \alias{get_political}
5 | \title{Fetch 'Political' table}
6 | \format{
7 | Data frame in long format with columns (varies by legislature):
8 | \itemize{
9 | \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | \item{session: Legislative period (of class \sQuote{integer}).}
11 | \item{party: A legislator's party affiliation (of class \sQuote{character}). See \url{https://github.com/saschagobel/legislatoR} for the full form of abbreviated party names and english translations of non-english party names}
12 | \item{constituency: A legislator's constituency (of class \sQuote{character}).}
13 | \item{constituency2: A legislator's constituency (upper level, if applicable, of class \sQuote{character}).}
14 | \item{constituency_id: ID of a legislator's constituency (of class \sQuote{character}).}
15 | \item{session_start: Date the legislative period started (of class \sQuote{Date}).}
16 | \item{session_end: Date the legislative period ended (of class \sQuote{Date}).}
17 | \item{service: A legislator's period of service in days during the respective session (of class \sQuote{integer})}
18 | \item{government (or similar): Indicator of a legislator's majority status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.}
19 | \item{leader (or similar): Indicator of a legislator's leader status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.}
20 | }
21 | }
22 | \source{
23 | Wikipedia, \url{https://www.wikipedia.org/} \cr
24 | Czech Republic Parliamentary Members Archive \url{https://public.psp.cz/sqw/fsnem.sqw?zvo=1} \cr
25 | Spain Parliamentary Members Archive \url{https://www.congreso.es/es/busqueda-de-diputados}
26 | }
27 | \usage{
28 | get_political(legislature)
29 | }
30 | \arguments{
31 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
32 | }
33 | \value{
34 | A data frame with columns as specified above.
35 | }
36 | \description{
37 | Fetches political data of legislators for the specified legislature. Requires a working Internet connection.
38 | }
39 | \examples{
40 | \donttest{# Get entire 'Political' table for the Czech Poslanecka Snemovna
41 | cze_political <- get_political(legislature = "cze")
42 | tibble::glimpse(cze_political)
43 |
44 | # Get 'Political' table for female DSP party members of the Czech Poslanecka Snemovna
45 | cze_political_subset <- dplyr::semi_join(x = dplyr::filter(cze_political,
46 | party == "ODS"),
47 | y = dplyr::filter(get_core(legislature = "cze"),
48 | sex == "female"),
49 | by = "pageid")
50 | tibble::glimpse(cze_political_subset)
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/R/get_office.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Offices' table
2 | #'
3 | #' Fetches political and other offices of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame in wide format with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | #' \item{office_1: political or other office held by a legislator (of class \sQuote{logical}).}
11 | #' \item{office_2: ... (of class \sQuote{logical}).}
12 | #' \item{...}
13 | #' }
14 | #' @examples
15 | #' \donttest{# Get entire 'Offices' table for the United States Senate
16 | #' usa_offices <- get_office(legislature = "usa_senate")
17 | #' tibble::glimpse(usa_offices)
18 | #'
19 | #' # Get 'Offices' table for male members of the United States Senate
20 | #' usa_offices_subset <- dplyr::semi_join(x = usa_offices,
21 | #' y = dplyr::filter(get_core(legislature = "usa_senate"),
22 | #' sex == "female"),
23 | #' by = "wikidataid")
24 | #' tibble::glimpse(usa_offices_subset)
25 | #' }
26 | #' @source
27 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
28 | #' @export
29 | #' @importFrom curl nslookup
30 | #' @import dplyr
31 | get_office <- function(legislature) {
32 | if (length(legislature) > 1) {
33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
34 | }
35 | if (!(legislature %in% c("aut", "bra", "can", "cze",
36 | "deu", "esp", "fra", "gbr",
37 | "irl", "isr", "ita_house", "ita_senate",
38 | "jpn", "nld", "sco", "tur",
39 | "usa_house", "usa_senate"))) {
40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
41 | paste0(
42 | paste0("\"", legislature, "\""),
43 | collapse = ", "),
44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
45 | }
46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
48 | }
49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
50 | file_id <- sysdata %>% filter(.data$table == "office" & .data$country == legislature)
51 | dvurl <- paste0(endpoint, file_id$id)
52 | connect <- url(dvurl)
53 | on.exit(close(connect))
54 | dataset <- readRDS(connect)
55 | return(dataset)
56 | }
57 |
--------------------------------------------------------------------------------
/R/get_portrait.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Portrait' table
2 | #'
3 | #' Fetches portrait urls of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | #' \item{image_url: URL linking to a legislator's portrait on Wikimedia Commons (of class \sQuote{character}).}
11 | #' }
12 | #' @examples
13 | #' \donttest{# Get entire 'Portraits' table for the United States Senate
14 | #' usa_portraits <- get_portrait(legislature = "usa_senate")
15 | #' tibble::glimpse(usa_portraits)
16 | #'
17 | #' # Get 'Portraits' table for Democratic members of the United States Senate
18 | #' usa_port_subset <- dplyr::semi_join(x = usa_portraits,
19 | #' y = dplyr::filter(get_political(legislature = "usa_senate"),
20 | #' party == "D"),
21 | #' by = "pageid")
22 | #' tibble::glimpse(usa_port_subset)
23 | #' }
24 | #' @source
25 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr
26 | #' Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page}
27 | #' @export
28 | #' @importFrom curl nslookup
29 | #' @import dplyr
30 | get_portrait <- function(legislature) {
31 | if (length(legislature) > 1) {
32 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
33 | }
34 | if (!(legislature %in% c("aut", "bra", "can", "cze",
35 | "deu", "esp", "fra", "gbr",
36 | "irl", "isr", "ita_house", "ita_senate",
37 | "jpn", "nld", "sco", "tur",
38 | "usa_house", "usa_senate"))) {
39 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
40 | paste0(
41 | paste0("\"", legislature, "\""),
42 | collapse = ", "),
43 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
44 | }
45 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
46 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
47 | }
48 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
49 | file_id <- sysdata %>% filter(.data$table == "portrait" & .data$country == legislature)
50 | dvurl <- paste0(endpoint, file_id$id)
51 | connect <- url(dvurl)
52 | on.exit(close(connect))
53 | dataset <- readRDS(connect)
54 | return(dataset)
55 | }
56 |
--------------------------------------------------------------------------------
/R/get_profession.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Professions' table
2 | #'
3 | #' Fetches occupational data of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame in wide format with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | #' \item{occupation_1: occupation a legislator practiced or was trained in (of class \sQuote{logical}).}
11 | #' \item{occupation_2: ... (of class \sQuote{logical}).}
12 | #' \item{...}
13 | #' }
14 | #' @examples
15 | #' \donttest{# Get entire 'Professions' table for the United States House
16 | #' usa_offices <- get_office(legislature = "usa_house")
17 | #' tibble::glimpse(usa_offices)
18 | #'
19 | #' # Get 'Professions' table for female members of the United States House
20 | #' usa_offices_subset <- dplyr::semi_join(x = usa_offices,
21 | #' y = dplyr::filter(get_core(legislature = "usa_house"),
22 | #' sex == "female"),
23 | #' by = "wikidataid")
24 | #' tibble::glimpse(usa_offices_subset)
25 | #' }
26 | #' @source
27 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
28 | #' @export
29 | #' @importFrom curl nslookup
30 | #' @import dplyr
31 | get_profession <- function(legislature) {
32 | if (length(legislature) > 1) {
33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
34 | }
35 | if (!(legislature %in% c("aut", "bra", "can", "cze",
36 | "deu", "esp", "fra", "gbr",
37 | "irl", "isr", "ita_house", "ita_senate",
38 | "jpn", "nld", "sco", "tur",
39 | "usa_house", "usa_senate"))) {
40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
41 | paste0(
42 | paste0("\"", legislature, "\""),
43 | collapse = ", "),
44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
45 | }
46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
48 | }
49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
50 | file_id <- sysdata %>% filter(.data$table == "profession" & .data$country == legislature)
51 | dvurl <- paste0(endpoint, file_id$id)
52 | connect <- url(dvurl)
53 | on.exit(close(connect))
54 | dataset <- readRDS(connect)
55 | return(dataset)
56 | }
57 |
--------------------------------------------------------------------------------
/R/get_traffic.R:
--------------------------------------------------------------------------------
1 | #' Fetch Wikipedia 'Traffic' table
2 | #'
3 | #' Fetches daily user traffic on legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns:
8 | #' \itemize{
9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | #' \item{date: Date for which user traffic is recorded, from 2015-07-01 to 2018-12-31 UTC (of class \sQuote{POSIXct}).}
11 | #' \item{traffic: Daily non-unique user visits (of class \sQuote{numeric}).}
12 | #' }
13 | #' @examples
14 | #' \donttest{# Get entire 'Traffic' table for the Scottish Parliament
15 | #' sco_traffic <- get_traffic(legislature = "sco")
16 | #' tibble::glimpse(sco_traffic)
17 | #'
18 | #' # Add Wikidataid to 'Traffic' table for the Scottish Parliament
19 | #' sco_traffic_subset <- dplyr::inner_join(x = dplyr::select(get_core(legislature = "sco"),
20 | #' pageid, wikidataid),
21 | #' y = sco_traffic,
22 | #' by = "pageid")
23 | #' tibble::glimpse(sco_traffic_subset)
24 | #' }
25 | #' @source
26 | #' Wikimedia API, \url{https://wikimedia.org/api/rest_v1/} \cr
27 | #' \url{http://petermeissner.de:8880/}
28 | #' @export
29 | #' @importFrom curl nslookup
30 | #' @import dplyr
31 | get_traffic <- function(legislature) {
32 | if (length(legislature) > 1) {
33 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
34 | }
35 | if (!(legislature %in% c("aut", "bra", "can", "cze",
36 | "deu", "esp", "fra", "gbr",
37 | "irl", "isr", "ita_house", "ita_senate",
38 | "jpn", "nld", "sco", "tur",
39 | "usa_house", "usa_senate"))) {
40 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
41 | paste0(
42 | paste0("\"", legislature, "\""),
43 | collapse = ", "),
44 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
45 | }
46 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
47 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
48 | }
49 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
50 | file_id <- sysdata %>% filter(.data$table == "traffic" & .data$country == legislature)
51 | dvurl <- paste0(endpoint, file_id$id)
52 | connect <- url(dvurl)
53 | on.exit(close(connect))
54 | dataset <- readRDS(connect)
55 | return(dataset)
56 | }
57 |
--------------------------------------------------------------------------------
/R/get_social.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Social' table
2 | #'
3 | #' Fetches social media handles and website URLs of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | #' \item{twitter: Twitter handle (of class \sQuote{character}).}
11 | #' \item{facebook: Facebook handle (of class \sQuote{character}).}
12 | #' \item{youtube: Youtube ID (of class \sQuote{character}).}
13 | #' \item{googlep: Google Plus ID (of class \sQuote{character}).}
14 | #' \item{instagram: Instagram handle (of class \sQuote{character}).}
15 | #' \item{linkedin: LinkedIn ID (of class \sQuote{character}).}
16 | #' \item{website: Personal website URL (of class \sQuote{character}).}
17 | #' }
18 | #' @examples
19 | #' \donttest{# Get entire 'Social' table forthe UK House of Commons
20 | #' gbr_social <- get_social(legislature = "gbr")
21 | #' tibble::glimpse(gbr_social)
22 | #'
23 | #' # Get 'Social' table for members of the UK House of Commons with available TheyWorkForYou ID
24 | #' gbr_social_subset <- dplyr::semi_join(x = gbr_social,
25 | #' y = dplyr::filter(get_ids(legislature = "gbr"),
26 | #' !is.na(theyworkforyou)),
27 | #' by = "wikidataid")
28 | #' tibble::glimpse(gbr_social_subset)
29 | #' }
30 | #' @source
31 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
32 | #' @export
33 | #' @importFrom curl nslookup
34 | #' @import dplyr
35 | get_social <- function(legislature) {
36 | if (length(legislature) > 1) {
37 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
38 | }
39 | if (!(legislature %in% c("aut", "bra", "can", "cze",
40 | "deu", "esp", "fra", "gbr",
41 | "irl", "isr", "ita_house", "ita_senate",
42 | "jpn", "nld", "sco", "tur",
43 | "usa_house", "usa_senate"))) {
44 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
45 | paste0(
46 | paste0("\"", legislature, "\""),
47 | collapse = ", "),
48 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
49 | }
50 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
51 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
52 | }
53 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
54 | file_id <- sysdata %>% filter(.data$table == "social" & .data$country == legislature)
55 | dvurl <- paste0(endpoint, file_id$id)
56 | connect <- url(dvurl)
57 | on.exit(close(connect))
58 | dataset <- readRDS(connect)
59 | return(dataset)
60 | }
61 |
--------------------------------------------------------------------------------
/R/get_history.R:
--------------------------------------------------------------------------------
1 | #' Fetch Wikipedia 'History' table
2 | #'
3 | #' Fetches full revision histories of legislators' Wikipedia biographies for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns:
8 | #' \itemize{
9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | #' \item{revid: Wikipedia edit ID (of class \sQuote{integer}).}
11 | #' \item{parentid: Wikipedia edit ID of the previous revision (of class \sQuote{integer}).}
12 | #' \item{user: Username of registered user responsible for the revision, IP address in case of anonymous revision (of class \sQuote{character}).}
13 | #' \item{userid: ID of registered user responsible for the revision, 0 in case of anonymous revision (of class \sQuote{integer}).}
14 | #' \item{timestamp: Date and time of the revision (of class \sQuote{POSIXct}).}
15 | #' \item{size: Revision size in bytes (of class \sQuote{integer}).}
16 | #' \item{comment: Revision comment (of class \sQuote{character}).}
17 | #' }
18 | #' @examples
19 | #' \donttest{# Get entire 'History' table for the Austrian Nationalrat
20 | #' aut_history <- get_history(legislature = "aut")
21 | #' tibble::glimpse(aut_history)
22 | #'
23 | #' # Get 'History' table for NEOS party members of the Austrian Nationalrat
24 | #' aut_history_subset <- dplyr::semi_join(x = aut_history,
25 | #' y = dplyr::filter(get_political(legislature = "aut"),
26 | #' party == "NEOS"),
27 | #' by = "pageid")
28 | #' tibble::glimpse(aut_history_subset)
29 | #' }
30 | #' @source
31 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php}
32 | #' @export
33 | #' @importFrom curl nslookup
34 | #' @import dplyr
35 | get_history <- function(legislature) {
36 | if (length(legislature) > 1) {
37 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
38 | }
39 | if (!(legislature %in% c("aut", "bra", "can", "cze",
40 | "deu", "esp", "fra", "gbr",
41 | "irl", "isr", "ita_house", "ita_senate",
42 | "jpn", "nld", "sco", "tur",
43 | "usa_house", "usa_senate"))) {
44 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
45 | paste0(
46 | paste0("\"", legislature, "\""),
47 | collapse = ", "),
48 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
49 | }
50 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
51 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
52 | }
53 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
54 | file_id <- sysdata %>% filter(.data$table == "history" & .data$country == legislature)
55 | dvurl <- paste0(endpoint, file_id$id)
56 | connect <- url(dvurl)
57 | on.exit(close(connect))
58 | dataset <- readRDS(connect)
59 | return(dataset)
60 | }
61 |
--------------------------------------------------------------------------------
/tests/testthat/test-get_core.R:
--------------------------------------------------------------------------------
1 | test_that("Core table is returned appropriately for each legislature", {
2 | skip_on_cran()
3 | expect_identical(class(get_core("aut")), "data.frame")
4 | Sys.sleep(1)
5 | expect_identical(class(get_core("can")), "data.frame")
6 | Sys.sleep(1)
7 | expect_identical(class(get_core("cze")), "data.frame")
8 | Sys.sleep(1)
9 | expect_identical(class(get_core("esp")), "data.frame")
10 | Sys.sleep(1)
11 | expect_identical(class(get_core("fra")), "data.frame")
12 | Sys.sleep(1)
13 | expect_identical(class(get_core("deu")), "data.frame")
14 | Sys.sleep(1)
15 | expect_identical(class(get_core("irl")), "data.frame")
16 | Sys.sleep(1)
17 | expect_identical(class(get_core("sco")), "data.frame")
18 | Sys.sleep(1)
19 | expect_identical(class(get_core("gbr")), "data.frame")
20 | Sys.sleep(1)
21 | expect_identical(class(get_core("usa_house")), "data.frame")
22 | Sys.sleep(1)
23 | expect_identical(class(get_core("usa_senate")), "data.frame")
24 | Sys.sleep(100)
25 |
26 | expect_true(all(dim(get_core("aut")) > 0))
27 | Sys.sleep(1)
28 | expect_true(all(dim(get_core("can")) > 0))
29 | Sys.sleep(1)
30 | expect_true(all(dim(get_core("cze")) > 0))
31 | Sys.sleep(1)
32 | expect_true(all(dim(get_core("esp")) > 0))
33 | Sys.sleep(1)
34 | expect_true(all(dim(get_core("fra")) > 0))
35 | Sys.sleep(1)
36 | expect_true(all(dim(get_core("deu")) > 0))
37 | Sys.sleep(1)
38 | expect_true(all(dim(get_core("irl")) > 0))
39 | Sys.sleep(1)
40 | expect_true(all(dim(get_core("sco")) > 0))
41 | Sys.sleep(1)
42 | expect_true(all(dim(get_core("gbr")) > 0))
43 | Sys.sleep(1)
44 | expect_true(all(dim(get_core("usa_house")) > 0))
45 | Sys.sleep(1)
46 | expect_true(all(dim(get_core("usa_senate")) > 0))
47 | Sys.sleep(100)
48 |
49 | expect_identical(colnames(get_core("aut"))[1:3], c("country", "pageid", "wikidataid"))
50 | Sys.sleep(1)
51 | expect_identical(colnames(get_core("can"))[1:3], c("country", "pageid", "wikidataid"))
52 | Sys.sleep(1)
53 | expect_identical(colnames(get_core("cze"))[1:3], c("country", "pageid", "wikidataid"))
54 | Sys.sleep(1)
55 | expect_identical(colnames(get_core("esp"))[1:3], c("country", "pageid", "wikidataid"))
56 | Sys.sleep(1)
57 | expect_identical(colnames(get_core("fra"))[1:3], c("country", "pageid", "wikidataid"))
58 | Sys.sleep(1)
59 | expect_identical(colnames(get_core("deu"))[1:3], c("country", "pageid", "wikidataid"))
60 | Sys.sleep(1)
61 | expect_identical(colnames(get_core("irl"))[1:3], c("country", "pageid", "wikidataid"))
62 | Sys.sleep(1)
63 | expect_identical(colnames(get_core("sco"))[1:3], c("country", "pageid", "wikidataid"))
64 | Sys.sleep(1)
65 | expect_identical(colnames(get_core("gbr"))[1:3], c("country", "pageid", "wikidataid"))
66 | Sys.sleep(1)
67 | expect_identical(colnames(get_core("usa_house"))[1:3], c("country", "pageid", "wikidataid"))
68 | Sys.sleep(1)
69 | expect_identical(colnames(get_core("usa_senate"))[1:3], c("country", "pageid", "wikidataid"))
70 | Sys.sleep(100)
71 |
72 | expect_identical(sample(get_core("aut")$country, 1), "AUT")
73 | Sys.sleep(1)
74 | expect_identical(sample(get_core("can")$country, 1), "CAN")
75 | Sys.sleep(1)
76 | expect_identical(sample(get_core("cze")$country, 1), "CZE")
77 | Sys.sleep(1)
78 | expect_identical(sample(get_core("esp")$country, 1), "ESP")
79 | Sys.sleep(1)
80 | expect_identical(sample(get_core("fra")$country, 1), "FRA")
81 | Sys.sleep(1)
82 | expect_identical(sample(get_core("deu")$country, 1), "DEU")
83 | Sys.sleep(1)
84 | expect_identical(sample(get_core("irl")$country, 1), "IRL")
85 | Sys.sleep(1)
86 | expect_identical(sample(get_core("sco")$country, 1), "SCO")
87 | Sys.sleep(1)
88 | expect_identical(sample(get_core("gbr")$country, 1), "GBR")
89 | Sys.sleep(1)
90 | expect_identical(sample(get_core("usa_house")$country, 1), "USA-H")
91 | Sys.sleep(1)
92 | expect_identical(sample(get_core("usa_senate")$country, 1), "USA-S")
93 | Sys.sleep(100)
94 | })
95 |
--------------------------------------------------------------------------------
/R/get_core.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Core' table
2 | #'
3 | #' Fetches sociodemographic data of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{country: ISO 3166-1 alpha-3 three-letter country code (of class \sQuote{character}).}
10 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer} or \sQuote{character}).}
11 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
12 | #' \item{wikititle: A legislator's undirected Wikipedia title (of class \sQuote{character}).}
13 | #' \item{name: A legislator's full name (of class \sQuote{character}).}
14 | #' \item{sex: A legislator's sex (of class \sQuote{character}).}
15 | #' \item{ethnicity: A legislator's ethnicity (of class \sQuote{character}).}
16 | #' \item{religion: A legislator's religious denomination (of class \sQuote{character}).}
17 | #' \item{birth: A legislator's date of birth (of class \sQuote{POSIXct}).}
18 | #' \item{death: A legislator's date of death (of class \sQuote{POSIXct}).}
19 | #' \item{birthplace: Comma separated latitude and longitude of a legislator's place of birth (of class \sQuote{character}).}
20 | #' \item{deathplace: Comma separated latitude and longitude of a legislator's place of death (of class \sQuote{character}).}
21 | #' }
22 | #' @examples
23 | #' \donttest{# Get entire 'Core' table for the German Bundestag
24 | #' deu_core <- get_core(legislature = "deu")
25 | #' tibble::glimpse(deu_core)
26 | #'
27 | #' # Get 'Core' table for 16th session of the German Bundestag
28 | #' deu_core_subset <- dplyr::semi_join(x = deu_core,
29 | #' y = dplyr::filter(get_political(legislature = "deu"),
30 | #' session == 16),
31 | #' by = "pageid")
32 | #' tibble::glimpse(deu_core_subset)
33 | #' }
34 | #' @source
35 | #' Wikipedia, \url{https://www.wikipedia.org/} \cr
36 | #' Wikipedia API, \url{https://en.wikipedia.org/w/api.php} \cr
37 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page} \cr
38 | #' Wikimedia Commons, \url{https://commons.wikimedia.org/wiki/Main_Page} \cr
39 | #' Face++ Cognitive Services API, \url{https://www.faceplusplus.com/} \cr
40 | #' Germany Bundestag Open Data, \url{https://www.bundestag.de/services/opendata}
41 | #' @export
42 | #' @importFrom curl nslookup
43 | #' @import dplyr
44 | get_core <- function(legislature) {
45 | if (length(legislature) > 1) {
46 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
47 | }
48 | if (!(legislature %in% c("aut", "bra", "can", "cze",
49 | "deu", "esp", "fra", "gbr",
50 | "irl", "isr", "ita_house", "ita_senate",
51 | "jpn", "nld", "sco", "tur",
52 | "usa_house", "usa_senate"))) {
53 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
54 | paste0(
55 | paste0("\"", legislature, "\""),
56 | collapse = ", "),
57 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
58 | }
59 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
60 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
61 | }
62 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
63 | file_id <- sysdata %>% filter(.data$table == "core" & .data$country == legislature)
64 | dvurl <- paste0(endpoint, file_id$id)
65 | connect <- url(dvurl)
66 | on.exit(close(connect))
67 | dataset <- readRDS(connect)
68 | return(dataset)
69 | }
70 |
--------------------------------------------------------------------------------
/man/get_ids.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_ids.R
3 | \name{get_ids}
4 | \alias{get_ids}
5 | \title{Fetch 'IDs' table}
6 | \format{
7 | Data frame with columns (varies by legislature):
8 | \itemize{
9 | \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | \item{parlid: Parliamentary website or website ID (of class \sQuote{character}).}
11 | \item{btvote: ID for BTVote data on all roll call votes taken in the German Bundestag from 1949 to 2013 and for Reelection Prospects data. The respective ID in BTVote and Reelection Prospects data is 'mp_id' (of class \sQuote{character}).}
12 | \item{parlspeech: ID for ParlSpeech datasets containing full-text vectors of plenary speeches. The respective ID in ParlSpeech data is 'speaker' (of class \sQuote{character}).}
13 | \item{dpsi: ID for Database of Parliamentary Speeches in Ireland. The respective ID in the Database of Parliamentary Speeches in Ireland is 'memberID' (of class \sQuote{character}).}
14 | \item{eggersspirling: ID for Eggers and Spirling British political development database. The respective ID in Eggers and Spirling data is 'member.id' (of class \sQuote{character}).}
15 | \item{bioguide: ID for the Voteview congressional roll-call votes database. The respective ID in Voteview data is 'bioguide_id' (of class \sQuote{character}).}
16 | \item{icpsr: ID for Congressional Bills Project database. The respective ID in Congressional Bills Project data is 'icpsr' (of class \sQuote{character}).}
17 | \item{sycomore: Sycomore database of French MPs (of class \sQuote{character}).}
18 | \item{libcon: Library of Congress ID (of class \sQuote{character}).}
19 | \item{gnd: German National Library ID (of class \sQuote{character}).}
20 | \item{bnf: French National Library ID (of class \sQuote{character}).}
21 | \item{freebase: Freebase ID (of class \sQuote{character}).}
22 | \item{munzinger: Munzinger archive ID (of class \sQuote{character}).}
23 | \item{nndb: Notable Names Database ID (of class \sQuote{character}).}
24 | \item{imdb: Internet Movie Database ID (of class \sQuote{character}).}
25 | \item{brittanica: Encyclopedia Brittanica ID (of class \sQuote{character}).}
26 | \item{quora: Quora ID (of class \sQuote{character}).}
27 | \item{votesmart: Project Votesmart ID (of class \sQuote{character}).}
28 | \item{fec: Federal Election Commission ID (of class \sQuote{character}).}
29 | \item{ballotpedia: Ballotpedia ID (of class \sQuote{character}).}
30 | \item{opensecrets: Opensecrets ID (of class \sQuote{character}).}
31 | \item{genealogists: Genealogists ID (of class \sQuote{character}).}
32 | \item{politfacts: Politfacts ID (of class \sQuote{character}).}
33 | \item{nkcr: Czech National Library ID (of class \sQuote{character}).}
34 | \item{parlbio: parliament.uk biography ID (of class \sQuote{character}).}
35 | \item{parlthesaurus: UK Parliament thesaurus ID (of class \sQuote{character}).}
36 | \item{national: UK National Archives ID (of class \sQuote{character}).}
37 | \item{hansard: Hansard (1803-2005) ID (of class \sQuote{character}).}
38 | \item{publicwhip: PublicWhip ID (of class \sQuote{character}).}
39 | \item{theyworkforyou: TheyWorkForYou ID (of class \sQuote{character}).}
40 | }
41 | }
42 | \source{
43 | Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
44 | }
45 | \usage{
46 | get_ids(legislature)
47 | }
48 | \arguments{
49 | \item{legislature}{A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.}
50 | }
51 | \value{
52 | A data frame with columns as specified above.
53 | }
54 | \description{
55 | Fetches a range of IDs of legislators for the specified legislature. Requires a working Internet connection.
56 | }
57 | \examples{
58 | \donttest{# Get entire 'IDs' table for the for the United States Senate
59 | usa_ids <- get_ids(legislature = "usa_senate")
60 | tibble::glimpse(usa_ids)
61 |
62 | # Get ICPSR IDs and add 'Offices' table for the United States House
63 | usa_ids_subset <- dplyr::inner_join(x = dplyr::filter(usa_ids,
64 | !is.na(icpsr)),
65 | y = get_office(legislature = "usa_senate"),
66 | by = "wikidataid")
67 | tibble::glimpse(usa_ids_subset)
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/R/get_political.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'Political' table
2 | #'
3 | #' Fetches political data of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame in long format with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{pageid: Wikipedia page ID identifying a legislator's Wikipedia biography (of class \sQuote{integer}).}
10 | #' \item{session: Legislative period (of class \sQuote{integer}).}
11 | #' \item{party: A legislator's party affiliation (of class \sQuote{character}). See \url{https://github.com/saschagobel/legislatoR} for the full form of abbreviated party names and english translations of non-english party names}
12 | #' \item{constituency: A legislator's constituency (of class \sQuote{character}).}
13 | #' \item{constituency2: A legislator's constituency (upper level, if applicable, of class \sQuote{character}).}
14 | #' \item{constituency_id: ID of a legislator's constituency (of class \sQuote{character}).}
15 | #' \item{session_start: Date the legislative period started (of class \sQuote{Date}).}
16 | #' \item{session_end: Date the legislative period ended (of class \sQuote{Date}).}
17 | #' \item{service: A legislator's period of service in days during the respective session (of class \sQuote{integer})}
18 | #' \item{government (or similar): Indicator of a legislator's majority status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.}
19 | #' \item{leader (or similar): Indicator of a legislator's leader status in parliament (of class \sQuote{logical}). Further columns with extensions of this might exist.}
20 | #' }
21 | #' @examples
22 | #' \donttest{# Get entire 'Political' table for the Czech Poslanecka Snemovna
23 | #' cze_political <- get_political(legislature = "cze")
24 | #' tibble::glimpse(cze_political)
25 | #'
26 | #' # Get 'Political' table for female DSP party members of the Czech Poslanecka Snemovna
27 | #' cze_political_subset <- dplyr::semi_join(x = dplyr::filter(cze_political,
28 | #' party == "ODS"),
29 | #' y = dplyr::filter(get_core(legislature = "cze"),
30 | #' sex == "female"),
31 | #' by = "pageid")
32 | #' tibble::glimpse(cze_political_subset)
33 | #' }
34 | #' @source
35 | #' Wikipedia, \url{https://www.wikipedia.org/} \cr
36 | #' Czech Republic Parliamentary Members Archive \url{https://public.psp.cz/sqw/fsnem.sqw?zvo=1} \cr
37 | #' Spain Parliamentary Members Archive \url{https://www.congreso.es/es/busqueda-de-diputados}
38 | #' @export
39 | #' @importFrom curl nslookup
40 | #' @import dplyr
41 | get_political <- function(legislature) {
42 | if (length(legislature) > 1) {
43 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
44 | }
45 | if (!(legislature %in% c("aut", "bra", "can", "cze",
46 | "deu", "esp", "fra", "gbr",
47 | "irl", "isr", "ita_house", "ita_senate",
48 | "jpn", "nld", "sco", "tur",
49 | "usa_house", "usa_senate"))) {
50 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
51 | paste0(
52 | paste0("\"", legislature, "\""),
53 | collapse = ", "),
54 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
55 | }
56 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
57 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
58 | }
59 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
60 | file_id <- sysdata %>% filter(.data$table == "political" & .data$country == legislature)
61 | dvurl <- paste0(endpoint, file_id$id)
62 | connect <- url(dvurl)
63 | on.exit(close(connect))
64 | dataset <- readRDS(connect)
65 | return(dataset)
66 | }
67 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # legislatoR 1.1
2 |
3 | * Data for Canada, Czech Republic, France, Germany, Scotland, and USA updated
4 | * Data for Brazil, Israel, Italy, Japan, Netherlands, and Turkey newly added
5 | * All data moved to Harvard Dataverse
6 |
7 |
8 | # legislatoR 1.0
9 |
10 | * All legislative periods of Spain's Congreso de los Diputados added to the database (14 in total).
11 | * Ongoing session of the Austrian Nationalrat added to the database (27th) and concluded session updated (26th).
12 | * Ongoing session of the Canadian House of Commons added to the database (43th) and concluded session updated (42th).
13 | * Ongoing session of the Irish Dáil added to the database (33th) and concluded session updated (32th).
14 | * Ongoing session of the UK House of Commons added to the database (58th) and concluded session updated (57th).
15 | * Irish Dáil data integrated with Database of Parliamentary Speeches in Ireland.
16 | * Czech Poslanecka Snemovna data integrated with ParlSpeech V1.
17 | * UK House of Commons data integrated with ParlSpeech V1.
18 | * Spanish Congreso de los Diputados data integrated with ParlSpeech V2.
19 | * Austrian Nationalrat data integrated with ParlSpeech V2.
20 | * Daily Wikipedia page traffic now goes back to December 2007 (except for the US House, where file storage restrictions currently limit traffic to range back to July 2009 only).
21 | * New `cld_content()` function that offers a quick overview of the CLD's scope and valid three-letter country codes, and helps to conventiently loop/map over legislatures and sessions.
22 | * "Introducing legislatoR" Vignette added.
23 | * Now available via CRAN
24 | * New Logo/Hexsticker.
25 |
26 |
27 | # legislatoR 0.2.0
28 |
29 | * 1st to 37th legislative periods of UK's House of Commons added to the database (37 in total).
30 | * Missing legislators and services variable added to all sessions of the French Assemblée.
31 | * German Bundestag data integrated with BTVote and ParlSpeech data.
32 | * UK House of Commons data integrated with Eggers and Spirling british political development database (starting from 38th session).
33 | * United States House and Senate data integrated with Voteview and congressional bills project data.
34 | * Missing Twitter handles added for the Austrian Nationalrat, Czech Poslanecka Snemovna, French Assemblée, German Bundestag, Irish Dail, UK House of Commons, and United States House and Senate (841 in total).
35 | * Missing religious affilition added for United States House and Senate and Canada's House of Commons.
36 | * Missing ethnicity added for Canada and UK's House of Commons.
37 |
38 | # legislatoR 0.1.0
39 |
40 | * Changed some function calls: `get_facial` is now `get_portrait` and `get_occupation` is now `get_profession`.
41 | * Changed valid legislature arguments for `get_` functions. Full legislature names are no longer accepted as legislature codes. Instead, three-letter country codes such as `aut`, `can`, `cze`, `fra`, `deu`, `irl`, `sco`, `gbr`, `usa_house`, `usa_senate` must be used.
42 | * All legislative periods of Canada's House of Commons added to the database (42 in total).
43 | * All legislative periods of Czech Republic's Poslanecka Snemovna added to the database (8 in total).
44 | * All legislative periods of Scotland's Parliament added to the database (5 in total).
45 | * 38th - 57th legislative periods of UK's House of Commons added to the database (20 in total).
46 | * Current session of the Austrian Nationalrat added to the database (26th).
47 | * Current session of the German Bundestag added to the database (19th).
48 | * Current session of the French Assemblée added to the database (15th).
49 | * Current session of the United States House of representatives added to the database (116th).
50 | * Current session of the United States Senate added to the database (116th).
51 | * ISO 3166-1 alpha-3 three-letter country codes added to all core datasets.
52 | * Start- and end date of sessions added to all political datasets.
53 | * Missing legislators and services variable added to all sessions of the Irish Dáil.
54 | * Error in United States Senate Wikipedia revision records corrected.
55 | * Missing religious affiliation added from 1st to 18th German Bundestag using official data.
56 | * Erroneous information on United States Senate and House members' period of service corrected.
57 | * Additional variables on government/opposition status and leader positions added to all sessions of the Canadian House of Commons and United States Congress.
58 | * Correction and alignment of party names across legislative periods for all parliaments.
59 | * Emotion estimates removed from the portraits dataset.
60 |
--------------------------------------------------------------------------------
/R/get_ids.R:
--------------------------------------------------------------------------------
1 | #' Fetch 'IDs' table
2 | #'
3 | #' Fetches a range of IDs of legislators for the specified legislature. Requires a working Internet connection.
4 | #'
5 | #' @param legislature A character string specifying the three-letter country code of the legislature for which data shall be fetched. Currently one of \sQuote{aut}, \sQuote{can}, \sQuote{cze}, \sQuote{esp}, \sQuote{fra}, \sQuote{deu}, \sQuote{irl}, \sQuote{sco}, \sQuote{gbr}, \sQuote{usa_house}, or \sQuote{usa_senate}.
6 | #' @return A data frame with columns as specified above.
7 | #' @format Data frame with columns (varies by legislature):
8 | #' \itemize{
9 | #' \item{wikidataid: Wikidata ID identifying a legislator's Wikidata entry (of class \sQuote{character}).}
10 | #' \item{parlid: Parliamentary website or website ID (of class \sQuote{character}).}
11 | #' \item{btvote: ID for BTVote data on all roll call votes taken in the German Bundestag from 1949 to 2013 and for Reelection Prospects data. The respective ID in BTVote and Reelection Prospects data is 'mp_id' (of class \sQuote{character}).}
12 | #' \item{parlspeech: ID for ParlSpeech datasets containing full-text vectors of plenary speeches. The respective ID in ParlSpeech data is 'speaker' (of class \sQuote{character}).}
13 | #' \item{dpsi: ID for Database of Parliamentary Speeches in Ireland. The respective ID in the Database of Parliamentary Speeches in Ireland is 'memberID' (of class \sQuote{character}).}
14 | #' \item{eggersspirling: ID for Eggers and Spirling British political development database. The respective ID in Eggers and Spirling data is 'member.id' (of class \sQuote{character}).}
15 | #' \item{bioguide: ID for the Voteview congressional roll-call votes database. The respective ID in Voteview data is 'bioguide_id' (of class \sQuote{character}).}
16 | #' \item{icpsr: ID for Congressional Bills Project database. The respective ID in Congressional Bills Project data is 'icpsr' (of class \sQuote{character}).}
17 | #' \item{sycomore: Sycomore database of French MPs (of class \sQuote{character}).}
18 | #' \item{libcon: Library of Congress ID (of class \sQuote{character}).}
19 | #' \item{gnd: German National Library ID (of class \sQuote{character}).}
20 | #' \item{bnf: French National Library ID (of class \sQuote{character}).}
21 | #' \item{freebase: Freebase ID (of class \sQuote{character}).}
22 | #' \item{munzinger: Munzinger archive ID (of class \sQuote{character}).}
23 | #' \item{nndb: Notable Names Database ID (of class \sQuote{character}).}
24 | #' \item{imdb: Internet Movie Database ID (of class \sQuote{character}).}
25 | #' \item{brittanica: Encyclopedia Brittanica ID (of class \sQuote{character}).}
26 | #' \item{quora: Quora ID (of class \sQuote{character}).}
27 | #' \item{votesmart: Project Votesmart ID (of class \sQuote{character}).}
28 | #' \item{fec: Federal Election Commission ID (of class \sQuote{character}).}
29 | #' \item{ballotpedia: Ballotpedia ID (of class \sQuote{character}).}
30 | #' \item{opensecrets: Opensecrets ID (of class \sQuote{character}).}
31 | #' \item{genealogists: Genealogists ID (of class \sQuote{character}).}
32 | #' \item{politfacts: Politfacts ID (of class \sQuote{character}).}
33 | #' \item{nkcr: Czech National Library ID (of class \sQuote{character}).}
34 | #' \item{parlbio: parliament.uk biography ID (of class \sQuote{character}).}
35 | #' \item{parlthesaurus: UK Parliament thesaurus ID (of class \sQuote{character}).}
36 | #' \item{national: UK National Archives ID (of class \sQuote{character}).}
37 | #' \item{hansard: Hansard (1803-2005) ID (of class \sQuote{character}).}
38 | #' \item{publicwhip: PublicWhip ID (of class \sQuote{character}).}
39 | #' \item{theyworkforyou: TheyWorkForYou ID (of class \sQuote{character}).}
40 | #' }
41 | #' @examples
42 | #' \donttest{# Get entire 'IDs' table for the for the United States Senate
43 | #' usa_ids <- get_ids(legislature = "usa_senate")
44 | #' tibble::glimpse(usa_ids)
45 | #'
46 | #' # Get ICPSR IDs and add 'Offices' table for the United States House
47 | #' usa_ids_subset <- dplyr::inner_join(x = dplyr::filter(usa_ids,
48 | #' !is.na(icpsr)),
49 | #' y = get_office(legislature = "usa_senate"),
50 | #' by = "wikidataid")
51 | #' tibble::glimpse(usa_ids_subset)
52 | #' }
53 | #' @source
54 | #' Wikidata API, \url{https://www.wikidata.org/wiki/Wikidata:Main_Page}
55 | #' @export
56 | #' @importFrom curl nslookup
57 | #' @import dplyr
58 | get_ids <- function(legislature) {
59 | if (length(legislature) > 1) {
60 | stop ("\n\nNo more than one legislature can be called at once. Please provide only one valid three-letter country code.")
61 | }
62 | if (!(legislature %in% c("aut", "bra", "can", "cze",
63 | "deu", "esp", "fra", "gbr",
64 | "irl", "isr", "ita_house", "ita_senate",
65 | "jpn", "nld", "sco", "tur",
66 | "usa_house", "usa_senate"))) {
67 | stop (paste0("\n\nPlease provide a valid three-letter country code. legislatoR does not recognize the country code or does not contain data for ",
68 | paste0(
69 | paste0("\"", legislature, "\""),
70 | collapse = ", "),
71 | ". Use `legislatoR::cld_content()` to see country codes of available legislatures."))
72 | }
73 | if (is.null(curl::nslookup("www.harvard.edu", error = FALSE))) {
74 | stop ("\n\nlegislatoR cannot establish a connection to Harvard Dataverse. Please check your Internet connection and whether Harvard Dataverse is online.")
75 | }
76 | endpoint <- "https://dataverse.harvard.edu/api/access/datafile/"
77 | file_id <- sysdata %>% filter(.data$table == "ids" & .data$country == legislature)
78 | dvurl <- paste0(endpoint, file_id$id)
79 | connect <- url(dvurl)
80 | on.exit(close(connect))
81 | dataset <- readRDS(connect)
82 | return(dataset)
83 | }
84 |
--------------------------------------------------------------------------------
/source/preparation_spain.R:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------------------------------------------------
2 | # legislatoR
3 | # Sascha Göbel and Simon Munzert
4 | # Script: preparations for spain
5 | # October 2017
6 | # ---------------------------------------------------------------------------------------
7 |
8 |
9 | #### PREPARATIONS =======================================================================
10 |
11 | # clear workspace -----------------------------------------------------------------------
12 | rm(list = ls(all = TRUE))
13 |
14 | # set working directory -----------------------------------------------------------------
15 | setwd("D:/Sascha/Projects/legislatoR")
16 |
17 | # install and load packages and functions -----------------------------------------------
18 | source("./code/packages.R")
19 | source("./code/functions.R")
20 |
21 |
22 | #### DATA PREPARATION ===================================================================
23 |
24 | # join core data, adjust names, and split into core and political -----------------------
25 | spain <- readRDS("./data/spain")
26 | spain_title <- readRDS("./data/spain_title")
27 | #spain_faces <- readRDS("./data/spain_faces")
28 | spain_sex <- readRDS("./data/spain_sex")
29 | spain_religion <- readRDS("./data/spain_religion")
30 | spain_birth <- readRDS("./data/spain_birth")
31 | spain_death <- readRDS("./data/spain_death")
32 | spain_birthplace <- readRDS("./data/spain_birthplace")
33 | spain_deathplace <-readRDS("./data/spain_deathplace")
34 | spain <- left_join(x = spain, y = spain_title, by = "pageid_unique") %>%
35 | #left_join(x = ., y = spain_faces[,c(1,2)], by = "pageid") %>% # no ethnicity for spain
36 | left_join(x = ., y = spain_sex, by = "wikidataid") %>%
37 | left_join(x = ., y = spain_religion, by = "wikidataid") %>%
38 | left_join(x = ., y = spain_birth, by = "wikidataid") %>%
39 | left_join(x = ., y = spain_death, by = "wikidataid") %>%
40 | left_join(x = ., y = spain_birthplace, by = "wikidataid") %>%
41 | left_join(x = ., y = spain_deathplace, by = "wikidataid")
42 | colnames(spain)[c(27,31,32)] <- c("wikititle", "birth", "death")
43 | spain <- spain %>% dplyr::select(country, pageid_unique, wikidataid, wikititle, name, sex,
44 | religion, birth, death, birthplace, deathplace,
45 | session, party, group, constituency, session_start, session_end,
46 | service)
47 | spain_core <- spain[!duplicated(spain$pageid_unique), 1:11]
48 | colnames(spain_core)[2] <- "pageid"
49 | spain_core$wikidataid <- ifelse(is.na(spain_core$wikidataid),
50 | paste0(spain_core$pageid, "-wd"),
51 | spain_core$wikidataid)
52 | spain_political <- spain[c(2, 12:18)]
53 | colnames(spain_political)[1] <- "pageid"
54 | rm(spain, spain_title, spain_sex, spain_religion, spain_birth,
55 | spain_death, spain_birthplace, spain_deathplace)
56 |
57 | # correct some name mismatches, pageids and wikidataids are correct
58 | spain_core[match(c("Q15256012", "Q2748756","Q66663492","Q44409046",
59 | "Q44630197", "Q64166732", "Q66663512", "Q11955329",
60 | "Q12399503", "Q5997107", "Q66663449", "Q44630268",
61 | "Q44409223", "Q44519245", "Q66663490", "Q448547",
62 | "Q3189830"),spain_core$wikidataid),]$name <-
63 | c("Francisco Ramos Fernández-Torrecilla", "Celestino Corbacho Chaves",
64 | "Germán Renau Martínez", "José Miguel González Moraga",
65 | "María Pía Sánchez Fernández", "Héctor Illueca Ballester",
66 | "Yolanda Seva Ruiz", "Xavier Tárrega Bernal",
67 | "Salvador Fernández Moreda", "María Margarita Robles Fernández",
68 | "Andrés Lorite Lorite", "Mercedes Toledo Silvestre",
69 | "Pío Pérez Laserna", "María Soledad Sánchez Jódar",
70 | "Margarita Prohens Rigo", "Pedro Duque",
71 | "Julio de España Moya")
72 | spain_core <- spain_core %>% filter(!(wikidataid %in% c("1-miss-wd", "20-miss-wd", "21-miss-wd", "4-miss-wd")))
73 |
74 | # format traffic data -------------------------------------------------------------------
75 | spain_traffic <- readRDS("./data/spain_traffic")
76 | spain_traffic$date <- spain_traffic$date %>% as.POSIXct(tz = "UTC")
77 |
78 | # format history data -------------------------------------------------------------------
79 | spain_history <- readRDS("./data/spain_history")
80 | spain_history <- spain_history %>% select(pageid = pageid_unique, revid, parentid, user,
81 | userid, timestamp, size, comment)
82 | spain_history$timestamp <- spain_history$timestamp %>% str_replace("T", " ") %>%
83 | as.POSIXct(tz = "UTC")
84 |
85 | # format facial data --------------------------------------------------------------------
86 | spain_faces <- readRDS("./data/spain_faces")
87 | spain_faces <- spain_faces[,-1]
88 |
89 | # save data -----------------------------------------------------------------------------
90 | spain_social <- readRDS("./data/spain_social")
91 | spain_positions <- readRDS("./data/spain_positions")
92 | spain_occupation <- readRDS("./data/spain_occupation")
93 | spain_id <- readRDS("./data/spain_id")
94 | saveRDS(spain_core, "./package/legislatoR-data-v1.0.0/esp_core")
95 | saveRDS(spain_political, "./package/legislatoR-data-v1.0.0/esp_political")
96 | saveRDS(spain_history, "./package/legislatoR-data-v1.0.0/esp_history")
97 | saveRDS(spain_traffic, "./package/legislatoR-data-v1.0.0/esp_traffic")
98 | saveRDS(spain_social, "./package/legislatoR-data-v1.0.0/esp_social")
99 | saveRDS(spain_faces, "./package/legislatoR-data-v1.0.0/esp_portrait")
100 | saveRDS(spain_positions, "./package/legislatoR-data-v1.0.0/esp_office")
101 | saveRDS(spain_occupation, "./package/legislatoR-data-v1.0.0/esp_profession")
102 | saveRDS(spain_id, "./package/legislatoR-data-v1.0.0/esp_ids")
103 |
104 |
105 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # legislatoR: Interface to the Comparative
Legislators Database
2 |
3 | [](https://www.gnu.org/licenses/gpl-3.0)
4 | [](https://cran.r-project.org/package=legislatoR)
5 | [](https://github.com/saschagobel/legislatoR/releases)
6 | [](https://cran.rstudio.com/web/packages/legislatoR/index.html)
7 |
8 | legislatoR is a package for the software environment R that facilitates access to the [Comparative Legislators Database (CLD)](https://complegdatabase.com/). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 67,000 contemporary and historical politicians from 16 countries. Data are also available for download in .csv and .sqlite formats at the [CLD's Dataverse](https://dataverse.harvard.edu/dataverse/cld).
9 |
10 | ## Content and data structure
11 | The CLD covers the following countries and time periods:
12 |
13 | | Country | Legislative sessions | Politicians (unique*) | Integrated with |
14 | | ------------------------------------ | --------------------------- | -------------------- | ------------------ |
15 | | Austria (Nationalrat) | all 27
(1920-2019) | 1,923 | [ParlSpeech V2](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN) (Rauh/Schwalbach 2020) |
16 | | Brazil (Câmara dos Deputados) | 38-57
(1947-2022) | 3,474 | |
17 | | Canada (House of Commons) | all 44
(1867-2021) | 4,567 | |
18 | | Czech Republic (Poslanecka Snemovna) | all 9
(1992-2021) | 1,124 | [ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017) |
19 | | France (Assemblée) | all 16
(1958-2022) | 4,263 | |
20 | | Germany (Bundestag) | all 20
(1949-2021) | 4,371 | [BTVote data](https://dataverse.harvard.edu/dataverse/btvote) (Bergmann et al. 2018),
[ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017),
[Reelection Prospects data](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/EBEDPI) (Stoffel/Sieberer 2017) |
21 | | Ireland (Dail) | all 33
(1918-2020) | 1,408 | [Database of Parliamentary Speeches in Ireland](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/6MZN76) (Herzog/Mikhaylov 2017) |
22 | | Israel (Knesset) | all 25
(1949-2022) | 1,022 | |
23 | | Italy (Camera dei deputati and Senato della Repubblica) | all 19
(1948-2022) | 5,149 | |
24 | | Japan (Shūgiin) | all 49
(1890-2021) | 6,581 | |
25 | | Netherlands (Tweede Kamer) | all 65
(1815-2021) | 1,887 | |
26 | | Scotland (Parliament) | all 6
(1999-2021) | 348 | [ParlScot](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/EQ9WBE) (Braby/Fraser 2021) |
27 | | Spain (Congreso de los Diputados) | all 14
(1979-2019) | 2,616 | [ParlSpeech V2](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN) (Rauh/Schwalbach 2020) |
28 | | Turkey (Büyük Millet Meclisi) | all 27
(1920-2018) | 5,298 | |
29 | | United Kingdom (House of Commons) | all 58
(1801-2019) | 11,321 | [EggersSpirling data](https://github.com/ArthurSpirling/EggersSpirlingDatabase) (starting from
38th session, Eggers/Spirling 2014),
[ParlSpeech V1](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/E4RSP9) (Rauh et al. 2017) |
30 | | United States (House and Senate) | all 117
(1789-2021) | 12,593 | [Voteview data](https://voteview.com/data) (Lewis et al. 2019),
[Congressional Bills Project data](http://www.congressionalbills.org/) (Adler/Wilkserson 2018) |
31 | | **16** | **529** | **67,945** | **12** |
32 |
33 | \* We only count legislators with a unique Wikipedia page or Wikidata ID. Sometimes legislators do not have either. Such cases are indicated by the string "miss" in the wikidataid or pageid.
34 |
35 | For each legislature, the CLD holds nine tables:
36 |
37 | 1. *Core* (sociodemographic data)
38 | 2. *Political* (political data)
39 | 3. *History* (full revision records of individual Wikipedia biographies)
40 | 4. *Traffic* (daily user traffic on individual Wikipedia biographies starting from July 2007)
41 | 5. *Social* (social media handles and personal website URLs)
42 | 6. *Portraits* (URLs to portraits)
43 | 7. *Offices* (public offices)
44 | 8. *Professions* (professions)
45 | 9. *IDs* (identifiers linking politicians to other files, databases, or websites)
46 |
47 | The tables contain the following variables (see respective R help files for further details):
48 |
49 | - *Core*: Country, Wikipedia page ID, Wikidata ID, Wikipedia Title, full name, sex, ethnicity, religion, date of birth and death, place of birth and death.
50 | - *Political*: Wikipedia page ID, legislative session, party affiliation, lower constituency, upper constituency, constituency ID, start and end date of legislative session, period of service, majority status, leader positions.
51 | - *History*: Wikipedia page ID, Wikipedia revision and previous revision ID, editor name/IP and ID, revision date and time, revision size, revision comment.
52 | - *Traffic*: Wikipedia page ID, date, user traffic.
53 | - *Social*: Wikidata ID, Twitter handle, Facebook handle, Youtube ID, Google Plus ID, Instagram handle, LinkedIn ID, personal website URL.
54 | - *Portraits*: Wikipedia page ID, Wikipedia portrait URL.
55 | - *Offices*: Wikidata ID, a range of offices such as attorney general, chief justice, mayor, party chair, secretary of state, etc.
56 | - *Professions*: Wikidata ID, a range of professions such as accountant, farmer, historian, judge, mechanic, police officer, salesperson, teacher, etc.
57 | - *IDs*: Wikidata ID, IDs for integration with various political science datsets as well as a range of other IDs such as parliamentary website IDs, Library of Congress or German National Library IDs, Notable Names Database or Project Vote Smart IDs, etc.
58 |
59 | Note that for some legislatures or legislative periods, tables may only hold information for a subset of politicians or variables.
60 |
61 | The CLD comes as a relational database. This means that all tables can be joined with the *Core* table via one of two keys - the Wikipedia page ID or the Wikidata ID. These keys uniquely identify individual politicians. The figure below illustrates this structure and the CLD's content.
62 |
63 |
64 |
65 |
15 |
16 | ###### Sascha Göbel and Simon Munzert -- April, 2020
17 |
18 | **legislatoR** facilitates access to the Comparative Legislators Database (CLD). The CLD includes political, sociodemographic, career, online presence, public attention, and visual information for over 45,000 contemporary and historical politicians from ten countries. Information is stored in nine topically distinguished tables for each country and arranged in a relational fashion.
19 |
20 | This vignette provides an introduction on how to use **legislatoR** to access and make the most of the information stored in the CLD.
21 |
22 | ## General access to the CLD
23 | Basic access to the CLD works through table-specific functions. Functions are named after the table they fetch and preceded by "get_". The table below lists data tables and corresponding function calls. Alternatively, you can call `?legislatoR()` to get an overview of all the functions in **legislatoR**.
24 |
25 | | Table | Function | Description | Key |
26 | | :---------- | :------------------ | :-------------------------------------------------------------------- | :----------------- |
27 | | Core | `get_core()` | Fetches sociodemographic data of legislators | pageid, wikidataid |
28 | | Political | `get_political()` | Fetches political data of legislators | pageid |
29 | | History | `get_history()` | Fetches full revision histories of legislators' Wikipedia biographies | pageid |
30 | | Traffic | `get_traffic()` | Fetches daily user traffic on legislators' Wikipedia biographies | pageid |
31 | | Social | `get_social()` | Fetches social media handles and website URLs of legislators | wikidataid |
32 | | Portraits | `get_portrait()` | Fetches portrait urls of legislators | pageid |
33 | | Offices | `get_office()` | Fetches political and other offices of legislators | wikidataid |
34 | | Professions | `get_profession()` | Fetches occupational data of legislators | wikidataid |
35 | | IDs | `get_ids()` | Fetches a range of IDs of legislators | wikidataid |
36 |
37 | Every "get_" function has a "legislature" argument that takes a character string specifying the three-letter country code of the legislature for which a table shall be fetched. The table below lists all legislatures available in the CLD together with their three-letter country code. Alternatively, you can call `?cld_content()` to get an overview of the CLD's scope and valid three-letter country codes. This will also show you the sessions available for each legislature.
38 |
39 | | Legislature | Code |
40 | | :----------------------------------- | :----------------------- |
41 | | Austria (Nationalrat) | `aut` |
42 | | Canada (House of Commons) | `can` |
43 | | Czech Republic (Poslanecka Snemovna) | `cze` |
44 | | France (Assemblée) | `fra` |
45 | | Germany (Bundestag) | `deu` |
46 | | Ireland (Dail) | `irl` |
47 | | Scotland (Parliament) | `sco` |
48 | | Spain (Congreso de los Diputados) | `esp` |
49 | | United Kingdom (House of Commons) | `gbr` |
50 | | United States (House and Senate) | `usa_house`/`usa_senate` |
51 |
52 | Here are some examples for fetching full tables for different countries. All tables come in a tidy (long) format. Every row represents a politician and every column a variable.
53 |
54 | ```{r eval = LOCAL}
55 | library(legislatoR)
56 | library(tibble)
57 |
58 | # get "Core" table for the United States House ------------------------------------------
59 | usa_house_core <- get_core(legislature = "usa_house")
60 | glimpse(usa_house_core)
61 |
62 | # get "Political" table for the German Bundestag ----------------------------------------
63 | deu_political <- get_political(legislature = "deu")
64 | glimpse(deu_political)
65 |
66 | # get "IDs" table for the Spanish Congreso ----------------------------------------------
67 | esp_ids <- get_ids(legislature = "esp")
68 | glimpse(esp_ids)
69 | ```
70 |
71 | ## Targeted access to the CLD
72 | **legislatoR** also facilitates more targeted access to the CLD than by simply downloading whole tables. Two legislator-specific keys, the Wikipedia page and the Wikidata ID, link all tables to the "Core" table. This allows for mutating and filtering joins using a popular grammar of data manipulation implemented in the 'dplyr' package. The table above lists the relevant key for each data table in the CLD. Here are some examples for combining and subsetting data from different tables. We always start from the "Core" table since it identifies legislators by name and country and never holds a legislator twice.
73 |
74 | ```{r eval = LOCAL}
75 | library(dplyr)
76 |
77 | # combine "Core" and "Political" tables for the Irish Dail ------------------------------
78 | irl_join <- left_join(x = get_core(legislature = "irl"),
79 | y = get_political(legislature = "irl"),
80 | by = "pageid")
81 | glimpse(irl_join)
82 |
83 | # then add the "Social" table -----------------------------------------------------------
84 | irl_join <- left_join(x = irl_join,
85 | y = get_social(legislature = "irl"),
86 | by = "wikidataid")
87 | glimpse(irl_join)
88 |
89 | # get "Core" table for Scottish Liberal Democrats
90 | sco_subset <- semi_join(x = get_core(legislature = "sco"),
91 | y = filter(get_political(legislature = "sco"),
92 | party == "Scottish Liberal Democrats"),
93 | by = "pageid")
94 | glimpse(sco_subset)
95 |
96 | # combine "Core" and "Political" tables for German Bundestag CDU/CSU and AfD members ----
97 | deu_subset <- inner_join(x = get_core(legislature = "deu"),
98 | y = filter(get_political(legislature = "deu"),
99 | party %in% c("CDU", "CSU", "AfD")),
100 | by = "pageid")
101 | glimpse(deu_subset)
102 |
103 | # combine "Core" and "Political" tables for female legislators from the 37th Canadian
104 | # House of Commons ----------------------------------------------------------------------
105 | can_subset <- inner_join(x = filter(get_core(legislature = "can"), sex == "female"),
106 | y = filter(get_political(legislature = "can"), session == 37),
107 | by = "pageid")
108 | glimpse(can_subset)
109 |
110 | # combine "Core", "Traffic", and "Social" tables for UK House Commons members with
111 | # Twitter handles -----------------------------------------------------------------------
112 | uk_subset <- left_join(x = inner_join(x = get_core(legislature = "gbr"),
113 | y = filter(get_social(legislature = "gbr"), !is.na(twitter)),
114 | by = "wikidataid"),
115 | y = get_traffic(legislature = "gbr"),
116 | by = "pageid")
117 | glimpse(uk_subset)
118 | ```
119 |
120 | Of course, you can also use the pipe operator `%>%` from the 'magrittr' package to improve code readability and reach your goal in less steps.
121 |
122 | ```{r eval = LOCAL}
123 | library(magrittr)
124 |
125 | # combine "Core", "IDs", and "Portraits" tables for the Austrian Nationalrat ------------
126 | aut_join <- get_core(legislature = "aut") %>%
127 | left_join(get_ids(legislature = "aut"),
128 | by = "wikidataid") %>%
129 | left_join(get_portrait(legislature = "aut"),
130 | by = "pageid")
131 | glimpse(aut_join)
132 |
133 | # get "Core" table for high-profile politicians (top 1% of Wikipedia page views) of
134 | # French Assemblée ----------------------------------------------------------------------
135 | fra_subset <- get_traffic(legislature = "fra") %>%
136 | group_by(pageid) %>%
137 | summarise(total_traffic = sum(traffic)) %>%
138 | filter(total_traffic >= quantile(total_traffic, probs = 0.99)) %>%
139 | semi_join(x = get_core(legislature = "fra"),
140 | y = .,
141 | by = "pageid")
142 | glimpse(fra_subset)
143 | ```
144 |
145 | ## Integrating with other sources
146 | The CLD is integrated with several other data projects. You can call `?get_ids()` to get an overview of all projects the CLD is integrated with and how respective IDs are named. Here are two examples that show how to use the IDs to join the CLD with other projects. The first example integrates the "Core" table for the Spanish Congreso with a small one-month-extract of the ParlSpeech V2 data (Rauh and Schwalbach 2020). The second example integrates the "Core" and "Political" tables for the Irish Dail with a small one-month-extract of the Database of Parliamentary Speeches in Ireland (Herzog and Mikhaylov 2017).
147 |
148 | ```{r eval = LOCAL}
149 | library(stringr)
150 |
151 | # import ParlSpeech example and rename ID to match CLD ----------------------------------
152 | parlspeech_example <- readRDS("parlspeech_example") %>%
153 | rename(parlspeech = speaker)
154 |
155 | # remove whitespace from start and end of the ID in ParlSpeech --------------------------
156 | parlspeech_example$parlspeech <- str_trim(parlspeech_example$parlspeech)
157 |
158 | # integrate CLD with ParlSpeech example -------------------------------------------------
159 | esp_speeches <- get_core(legislature = "esp") %>%
160 | left_join(get_ids(legislature = "esp"),
161 | by = "wikidataid") %>%
162 | filter(!is.na(parlspeech)) %>%
163 | inner_join(parlspeech_example,
164 | by = "parlspeech")
165 |
166 | # import Database of Parliamentary Speeches in Ireland example and rename ID ------------
167 | dpsi_example <- readRDS("dpsi_example") %>%
168 | rename(dpsi = memberID)
169 |
170 | # integrate CLD with ParlSpeech example -------------------------------------------------
171 | irl_speeches <- get_core(legislature = "irl") %>%
172 | inner_join(filter(get_political(legislature = "irl"), session == 28),
173 | by = "pageid") %>%
174 | left_join(get_ids(legislature = "irl"),
175 | by = "wikidataid") %>%
176 | inner_join(dpsi_example,
177 | by = "dpsi")
178 | ```
179 |
180 | ## Map over legislatures
181 | So far we have accessed the CLD legislature by legislature. It is also possible to retrieve data for multiple legislatures at once with the help of the `cld_content()` function. This function returns the three-letter country codes for all legislatures available in the CLD as well as the available legislative sessions. This helps to conveniently map over legislatures. In the first example below we `purrr::map()` over the names of all legislatures to get a list of "Core" tables. In the second example, we do the same and additionally join with the respective "Political" tables cut to the last three legislative sessions. To achieve this, we call `cld_content()` within `purrr::map()` one more time, passing the name of the respective legislature to get all available sessions, of which we then select the last three sessions to filter the "Political" tables accordingly before joining with the "Core" table. You can always pass a vector of three-letter country codes to the "legislature" argument of `cld_content()` beforehand or otherwise subset the list returned by the function to select a specific subset of legislatures.
182 |
183 | ```{r eval = LOCAL}
184 | library(purrr)
185 |
186 | # get "Core" table for all legislatures -------------------------------------------------
187 | all_core <- cld_content() %>%
188 | names() %>%
189 | map(get_core)
190 | glimpse(all_core)
191 |
192 | # get "Core" and "Political" tables for last three sessions of all legislatures ----------
193 | recent_sessions <- cld_content() %>%
194 | names() %>%
195 | map(~ {
196 | get_core(legislature = .x) %>%
197 | inner_join(filter(get_political(legislature = .x),
198 | session %in% tail(cld_content(.x)[[1]], 3)),
199 | by = "pageid")
200 | })
201 | glimpse(recent_sessions)
202 | ```
203 |
204 | ## Other Formats
205 | You do not have to be an R user to work with the CLD. If you are more familiar in conducting analyses with other software, such as Excel, SAS, STATA, or SPSS, you can use **legislatoR** to get the data you require as illustrated above and then export it into the desired format as shown below.
206 |
207 | ```{r eval = FALSE}
208 | library(haven)
209 |
210 | # save data as .csv for use with Excel --------------------------------------------------
211 | write.csv(fra_subset, "fra_subset.csv")
212 |
213 | # save data as .sas for use with SAS ----------------------------------------------------
214 | write_sas(sco_subset, "sco_subset.sas")
215 |
216 | # save data as .dta for use with STATA --------------------------------------------------
217 | write_dta(irl_join, "irl_join.dta")
218 |
219 | # save data as .sav for use with SPSS ---------------------------------------------------
220 | write_sav(esp_speeches, "esp_speeches.sav")
221 | ```
222 |
--------------------------------------------------------------------------------
/GLOSSARY.md:
--------------------------------------------------------------------------------
1 | # Glossary
2 |
3 | #### Country codes
4 | * AUT - Austria
5 | * CAN - Canada
6 | * CZE - Czech Republic
7 | * FRA - France
8 | * DEU - Germany
9 | * IRL - Ireland
10 | * SCO - Scotland
11 | * ESP - Spain
12 | * GBR - United Kingdom
13 | * USA - United States of America
14 |
15 | #### Austria party abbreviations and translations
16 | * BZÖ - Bündnis Zukunft Österreich (Alliance for the Future of Austria)
17 | * CSP - Christlichsoziale Partei (Christian Social Party)
18 | * FPÖ - Freiheitliche Partei Österreichs (Freedom Party of Austria)
19 | * GdP - Großdeutsche Volkspartei (Greater German People's Party)
20 | * GRÜNE - Die Grüne Alternative (The Greens – The Green Alternative)
21 | * HB - Heimatblock
22 | * KPÖ - Kommunistische Partei Österreichs (Communist Party of Austria)
23 | * LBd - Landbund für Österreich
24 | * LIF - Liberales Forum (Liberal Forum)
25 | * NEOS - Das Neue Österreich und Liberales Forum (The New Austria and Liberal Forum)
26 | * none - Independent
27 | * NWB - Nationaler Wirtschaftsblock (National Economic Block and Landbund)
28 | * ÖVP - Österreichische Volkspartei (Austrian People's Party)
29 | * PILZ - Liste Peter Pilz
30 | * SdP - Sozialdemokratische Partei (Social Democratic Party)
31 | * SPÖ - Sozialdemokratische Partei Österreichs (Social Democratic Party of Austria)
32 | * STRONACH - Team Stronach für Österreich
33 | * VO - Wahlgemeinschaft Österreichische Volksopposition (Electoral Community of the Austrian People's Opposition)
34 | * WdU - Wahlpartei der Unabhängigen (Federation of Independents)
35 |
36 | #### Czech Republic party abbreviations and translations
37 | * ANO - Akce nespokojených obcanu (Action of Dissatisfied Citizens)
38 | * CSS - Ceská strana národne sociální (Czech National Social Party)
39 | * CSSD - Ceská strana sociálne demokratická (Czech Social Democratic Party)
40 | * DSP - Demokratická strana práce (Democratic Labour Party)
41 | * HSD–SMS - Hnutí za samosprávnou demokracii–Spolecnost pro Moravu a Slezsko (Movement for Autonomous Democracy–Party for Moravia and Silesia)
42 | * KDS - Krestanskodemokratická strana (Christian Democratic Party)
43 | * KDU–CSL - Krestanská a demokratická unie – Ceskoslovenská strana lidová (Christian and Democratic Union – Czechoslovak People's Party)
44 | * KSCM - Komunistická strana Cech a Moravy (Communist Party of Bohemia and Moravia)
45 | * LSU - Liberálne sociální unie (Liberal-Social Union)
46 | * MNS - Moravská národní strana (Moravian National Party)
47 | * ODA - Obcanská demokratická aliance (Civic Democratic Alliance)
48 | * ODS - Obcanská demokratická strana (Civic Democratic Party)
49 | * Piráti - Ceská pirátská strana (Czech Pirate Party)
50 | * SPD - Svoboda a prímá demokracie - Tomio Okamura (Freedom and Direct Democracy – Tomio Okamura)
51 | * SPR-RSC - Sdružení pro republiku - Republikánská strana Ceskoslovenska (Rally for the Republic – Republican Party of Czechoslovakia)
52 | * STAN - Starostové a nezávislí (Mayors and Independents)
53 | * SZ - Strana zelených (Green Party)
54 | * TOP_09 - Tradice Odpovednost Prosperita (Tradition Responsibility Prosperity)
55 | * US-DEU - Unie Svobody–Demokratická unie (Freedom Union–Democratic Union)
56 | * Úsvit - Úsvit – Národní koalice (Dawn – National Coalition)
57 | * VV - Veci verejné (Public Affairs)
58 |
59 | #### France party abbreviations and translations
60 | * AC - Alliance centriste (Centrist Alliance)
61 | * Agir - Agir, la droite constructive (Act, the Constructive Right)
62 | * CD - Centre démocratique (Democratic Center)
63 | * CE - Calédonie ensemble (Caledonia Together)
64 | * DL - Démocratie libérale (Liberal Democracy)
65 | * DLF - Debout la France (France Arise)
66 | * DVD - Divers droite (Miscellaneous right)
67 | * DVG - Divers gauche (Miscellaneous left)
68 | * E! - Ensemble!
69 | * EAS - Regroupement national pour l'unité de la République (National Grouping for the Unity of the Republic)
70 | * ED - Entente démocratique (Democratic agreement)
71 | * FD - Front démocrate (Democratic Front)
72 | * FGDS - Fédération de la gauche démocrate et socialiste (Federation of the Democratic and Socialist Left)
73 | * FN - Front national (National Rally)
74 | * GDR - Gauche démocrate et républicaine (Democratic and Republican Left)
75 | * GÉ - Génération écologie (Ecology Generation)
76 | * IPAS - Centre national des indépendants et paysans (National Centre of Independents and Peasants)
77 | * La France insoumise - (Unsubmissive France)
78 | * LC - Les Centristes (The Centrists)
79 | * LP - Les Patriotes (The Patriots)
80 | * LR - Les Républicains (The Republicans)
81 | * LS - Ligue du Sud (Southern League)
82 | * MdP - Mouvement des progressistes (Movement of Progressives)
83 | * MIM - Mouvement indépendantiste martiniquais (Martinican Independence Movement)
84 | * MoDem - Mouvement démocrate et apparentés (Democratic Movement and affiliated group)
85 | * MPR - Pour La Réunion (For Reunion)
86 | * MRC - Mouvement républicain et citoyen (Citizen and Republican Movement)
87 | * MRSL - Mouvement radical, social et libéral (Radical Movement)
88 | * NC - Groupe Nouveau Centre (Group New Center)
89 | * PaC - Pè a Corsica
90 | * PCF - Parti communiste français (French Communist Party)
91 | * PDM - Progrès et démocratie moderne (Progress and Modern Democracy)
92 | * PÉ - Parti écologiste (Ecologist Party)
93 | * PG - Parti de gauche (Left Party)
94 | * PP - Place publique (Public place)
95 | * PPM - Parti progressiste martiniquais (Martinican Progressive Party)
96 | * PS - Parti socialiste (Socialist Party)
97 | * PSG - Parti socialiste guyanais (Guianese Socialist Party)
98 | * PSRG - Parti socialiste et Radicaux de gauche (Socialist Party and Radical Left)
99 | * RCV - Groupe radical, citoyen et vert (Radical group, citizen and green)
100 | * RD - Rassemblement démocratique (Democratic Rally)
101 | * RDM - Rassemblement démocratique pour la Martinique (Martinican Democratic Rally)
102 | * RDS - Réformateurs démocrates sociaux (Social Democratic Reformers)
103 | * RE974 - Rézistans Égalité 974
104 | * REM - La République en marche (The Republic on the move)
105 | * RI - Fédération nationale des républicains indépendants (Independent Republicans)
106 | * RPCD - Républicains populaires et du Centre démocratique (Popular Republicans and Democratic Center)
107 | * RPR - Rassemblement pour la République (Rally for the Republic)
108 | * RRDP - Groupe radical, républicain, démocrate et progressiste (Radical, Republican, Democratic and Progressive)
109 | * SER - Groupe socialiste, écologiste et républicain (Socialist, Ecologist and Republican Group)
110 | * SFIO - Section française de l'Internationale ouvrière (French Section of the Workers' International)
111 | * SRC - Socialiste, radical, citoyen et divers gauche (Socialist, radical, citizen and various left)
112 | * TH - Tavini huiraatira
113 | * UC - Union centriste (Centrist Union)
114 | * UDC - Union du centre (Union of the Center)
115 | * UDF - Union pour la démocratie française (Union for French Democracy)
116 | * UDI - Union des démocrates et indépendants (Union of Democrats and Independent)
117 | * UDR - Union des démocrates pour la République (Union of Democrats for the Republic)
118 | * UMP - Union pour un mouvement populaire (Union for a Popular Movement)
119 | * UNR-UDT - Union pour la nouvelle République-Union démocratique du travail (Union for the New Democratic-Union of Labor)
120 | * UNR - Union pour la nouvelle République (Union for the New Republic)
121 |
122 | #### Germany party abbreviations and translations
123 | * AfD - Alternative für Deutschland (Alternative for Germany)
124 | * AL - Alternative Liste für Demokratie und Umweltschutz (Alternative List for Democracy and Environmental Protection)
125 | * BP - Bayernpartei (Bavaria Party)
126 | * BÜNDNIS 90/DIE GRÜNEN - (Alliance 90/The Greens)
127 | * CDU - Christlich Demokratische Union Deutschlands (Christian Democratic Union of Germany)
128 | * CSU - Christlich-Soziale Union in Bayern (Christian Social Union in Bavaria)
129 | * CVP - Christliche Volkspartei des Saarlandes (Christian People's Party of the Saarland)
130 | * DIE LINKE - Die Linke (The Left)
131 | * DKP-DRP - Deutsche Konservative Partei – Deutsche Rechtspartei (German Right Party)
132 | * DP - Deutsche Partei (German Party)
133 | * DPS - Demokratische Partei Saar (Democratic Party Saar)
134 | * DSU - Deutsche Soziale Union (German Social Union)
135 | * FDP - Freie Demokratische Partei (Free Democratic Party)
136 | * FDV - Freie Deutsche Volkspartei (Free Germany People's Party)
137 | * GB/BHE - Gesamtdeutscher Block/Bund der Heimatvertriebenen und Entrechteten (All-German Bloc/League of Expellees and Deprived of Rights)
138 | * KPD - Kommunistische Partei Deutschlands (Communist Party of Germany)
139 | * PDS - Partei des Demokratischen Sozialismus (Party of Democratic Socialism)
140 | * SPD - Sozialdemokratische Partei Deutschlands (Social Democratic Party of Germany)
141 | * WAV - Wirtschaftliche Aufbau-Vereinigung (Economic Reconstruction Union)
142 | * ZENTRUM - Deutsche Zentrumspartei (Centre Party)
143 |
144 | #### Ireland party translations
145 | * Clann na Poblachta (Family/Children of the Republic)
146 | * Clann na Talmhan (Family/Children of the land)
147 | * Cumann na nGaedheal (Society of the Gaels)
148 | * Fianna Fáil (Soldiers of Destiny)
149 | * Fine Gael (Tribe of the Irish)
150 | * Sinn Féin (We Ourselves)
151 |
152 | #### United Kingdom party translations
153 | * Plaid Cymru (Party of Wales)
154 | * Sinn Féin (We Ourselves)
155 |
156 | #### United States of America House party abbreviations
157 | * A - Native American Party
158 | * AAP - Anti-Administration Party (DR)
159 | * AJ - National Republican Party
160 | * AL - American Labor Party
161 | * ALD - Lecompton Constitution
162 | * AM - Anti-Masonic Party
163 | * AMon - Anti-Monopoly Party
164 | * C - COnservative Party
165 | * CU - Constitutional Union Party
166 | * D - Democratic Party
167 | * D/PPD - Popular Democratic Party (Puerto Rico)
168 | * DFL - Minnesota Democratic–Farmer–Labor Party
169 | * DL - Liberal Party of New York
170 | * DNPL - North Dakota Democratic–Nonpartisan League Party
171 | * DR - Democratic-Republican Party
172 | * F - Federalist Party (Pro-Administration Party)
173 | * Fed - Federalist Party
174 | * FL - Farmer–Labor Party
175 | * FS - Free Soil Party
176 | * Home Rule - Hawaiian Independent Party
177 | * J - Jacksonian Democracy
178 | * L - Socialist Labor Party of America
179 | * Lib - Liberal Party of New York
180 | * LO - Law and Order Party of Rhode Island
181 | * LR - Liberal Republican Party
182 | * N - Nullifier Party
183 | * NaC - Nacionalista Party
184 | * NG - Greenback Party
185 | * NU - National Union Party
186 | * O - Opposition Party
187 | * PAP - Pro-Administration Party (PAP)
188 | * PNP/D - New Progressive Party (Puerto Rico)/Democratic
189 | * PNP/R - New Progressive Party (Puerto Rico)/Republican
190 | * Pop - People's Party
191 | * Prog - Progressive Party
192 | * Proh - Prohibition Party
193 | * R - Republican Party
194 | * RA - Readjuster Party
195 | * S = Silver Party
196 | * Soc - Socialist Party of America
197 | * Socialist - Socialist Party (Puerto Rico)
198 | * SR = Silver Republican Party
199 | * SRi - States' Rights Party
200 | * U - Unionist Party, later Unconditional Unionist Party
201 | * Unionist - Unionist Party (Puerto Rico)
202 | * UPR - Union of Puerto Rico
203 | * W - Whig Party
204 |
205 | #### United States of America Senate party abbreviations
206 | * L - Liberty Party
207 | * IMN - Independence Party of Minnesota
208 | * LR - Liberal Republican Party
209 |
--------------------------------------------------------------------------------
/source/integration_czech.R:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------------------------------------------------
2 | # legislatoR
3 | # Sascha Göbel and Simon Munzert
4 | # Script: integration with czech parlspeech
5 | # This script was written by Lada Rudnitckaia
6 | # March 2020
7 | # ---------------------------------------------------------------------------------------
8 |
9 |
10 | # devtools::install_github("saschagobel/legislatoR")
11 | library(legislatoR)
12 | library(xlsx)
13 | setwd("")
14 |
15 | if (.Platform$OS.type == 'windows') {
16 | Sys.setlocale(category = 'LC_ALL','English_United States.1250')
17 | } else {
18 | Sys.setlocale(category = 'LC_ALL','en_US.UTF-8')
19 | }
20 |
21 | cze <- get_core(legislature = "cze")
22 | cze_pol <- get_political(legislature = "cze")
23 | load("Corp_PSP.Rdata") # parl speech data
24 |
25 | ## Retrieve year from session
26 | cze_pol$start_year <- format(cze_pol$session_start,"%Y")
27 | psp.corpus$start_year <- substr(psp.corpus$session, start=1, stop=4)
28 |
29 | ## Filter by session 1-7 (as in ParlSpeech)
30 | cze_pol_1_7 <- cze_pol[cze_pol$start_year == 1992 |
31 | cze_pol$start_year == 1996 |
32 | cze_pol$start_year == 1998 |
33 | cze_pol$start_year == 2002 |
34 | cze_pol$start_year == 2006 |
35 | cze_pol$start_year == 2010 |
36 | cze_pol$start_year == 2013,]
37 |
38 | ### PARTY NAMES
39 | ### Since we will use party name for matching, they should be similar in cze_pol and ParlSpeech
40 | ## Check party names in cze_pol
41 | fr <- data.frame(table(as.matrix(cze_pol_1_7$party, useNA = "always")))
42 | fr
43 | ## Check party names in ParlSpeech
44 | fr_ParlSpeech <- data.frame(table(as.matrix(psp.corpus$party, useNA = "always")))
45 | fr_ParlSpeech
46 |
47 | ## Change party names in ParlSpeech to respective name from cze_pol
48 | # to change in cze_pol:
49 | # 1 ANO - ANO
50 | # 2 CSS - ?
51 | # 3 CSSD - CSSD
52 | # 4 DSP - ?
53 | # 5 HSD-SMS - HSD-SMS
54 | # 6 KDS - KDS
55 | # 7 KDU-CSL - KDU-CSL
56 | # 8 KSCM - KSCM
57 | # 9 LSU - LSU
58 | # 10 MNS - ?
59 | # 11 ODA - ODA
60 | # 12 ODS - ODS
61 | # 13 SPR-RSC - SPR-RSC
62 | # 14 SZ - SZ
63 | # 15 TOP 09 - TOP09
64 | # 16 US-DEU - US-DEU
65 | # 17 Usvit - Usvit
66 | # 18 VV - VV
67 |
68 | # to change in ParlSpeech:
69 | # 19 CMSS
70 | # 20 CMUS
71 | # 21 HSDMS
72 | # 22 LB
73 | # 23 LSNS
74 | # 24 Nez.-SZ
75 | # 25 Nezarazeni
76 | # 26 ONH
77 | # 27 other
78 | # 28 TOP 09 a Starostove
79 | # 29 US
80 |
81 | cze_pol_1_7$party <- gsub("CSS", "other", cze_pol_1_7$party)
82 | cze_pol_1_7$party <- gsub("DSP", "other", cze_pol_1_7$party)
83 | cze_pol_1_7$party <- gsub("MNS", "other", cze_pol_1_7$party)
84 | cze_pol_1_7$party <- gsub("TOP 09", "TOP09", cze_pol_1_7$party)
85 |
86 | psp.corpus$party <- gsub("CMSS", "other", psp.corpus$party)
87 | psp.corpus$party <- gsub("CMUS", "other", psp.corpus$party)
88 | psp.corpus$party <- gsub("HSDMS", "other", psp.corpus$party)
89 | psp.corpus$party <- gsub("LB", "other", psp.corpus$party)
90 | psp.corpus$party <- gsub("LSNS", "other", psp.corpus$party)
91 | psp.corpus$party <- gsub("Nez.-SZ", "other", psp.corpus$party)
92 | psp.corpus$party <- gsub("Nezarazeni", "other", psp.corpus$party)
93 | psp.corpus$party <- gsub("ONH", "other", psp.corpus$party)
94 | psp.corpus$party <- gsub("TOP 09 a Starostove", "other", psp.corpus$party)
95 | psp.corpus$party <- gsub("US_DEU", "1", psp.corpus$party)
96 | psp.corpus$party <- gsub("Usvit", "2", psp.corpus$party)
97 | psp.corpus$party <- gsub("US", "other", psp.corpus$party)
98 | psp.corpus$party <- gsub("1", "US_DEU", psp.corpus$party)
99 | psp.corpus$party <- gsub("2", "Usvit", psp.corpus$party)
100 |
101 |
102 | ## Add wikidataids and names to cze_pol from cze
103 | cze_pol_1_7$wikidataid <- cze$wikidataid[match(cze_pol_1_7$pageid, cze$pageid)]
104 | cze_pol_1_7$name <- cze$name[match(cze_pol_1_7$pageid, cze$pageid)]
105 |
106 |
107 | ### NAMES
108 | ## Some names include middle names that are not mentioned in ParlSpeech. Let's remove them
109 | ## and add a new column 'name1'
110 | cze_pol_1_7$name_split = strsplit(cze_pol_1_7$name, ' ')
111 | cze_pol_1_7$firstname = sapply(cze_pol_1_7$name_split, function(x) x[1])
112 | cze_pol_1_7$lastname = sapply(cze_pol_1_7$name_split, function(x) x[length(x)])
113 | cze_pol_1_7$name1 <- paste(cze_pol_1_7$firstname, cze_pol_1_7$lastname, sep = " ", collapse = NULL)
114 |
115 | ## Concatenate name and party to match both by name and party
116 | cze_pol_1_7$name_party <- paste(cze_pol_1_7$name, cze_pol_1_7$party, sep = " ", collapse = NULL)
117 | cze_pol_1_7$name1_party <- paste(cze_pol_1_7$name1, cze_pol_1_7$party, sep = " ", collapse = NULL)
118 |
119 |
120 |
121 | ################################### Session 1 ###################################
122 | ### Filter by 1st session
123 | cze_pol_1 <- cze_pol_1_7[cze_pol_1_7$start_year == 1992,]
124 | ParlSpeech_1 <- psp.corpus[psp.corpus$start_year == 1993,]
125 |
126 | ### NAMES
127 | ParlSpeech_1$name_party <- paste(ParlSpeech_1$speaker, ParlSpeech_1$party,
128 | sep = " ", collapse = NULL)
129 | ### FREQUENCY
130 | ## Name
131 | fr <- data.frame(table(as.matrix(cze_pol_1$name, useNA = "always")))
132 | cze_pol_1$uniquename <- fr$Freq[match(cze_pol_1$name, fr$Var1)]
133 | fr_ <- fr[fr$Freq != 1,]
134 | fr_
135 |
136 | ## Name1
137 | fr <- data.frame(table(as.matrix(cze_pol_1$name1, useNA = "always")))
138 | cze_pol_1$uniquename1 <- fr$Freq[match(cze_pol_1$name1, fr$Var1)]
139 | fr1_ <- fr[fr$Freq != 1,]
140 | fr1_
141 |
142 | ## Name + party
143 | fr <- data.frame(table(as.matrix(cze_pol_1$name_party, useNA = "always")))
144 | cze_pol_1$uniquename_party <- fr$Freq[match(cze_pol_1$name_party, fr$Var1)]
145 | fr_p <- fr[fr$Freq != 1,]
146 | fr_p
147 |
148 | ## Name1 + party
149 | fr <- data.frame(table(as.matrix(cze_pol_1$name1_party, useNA = "always")))
150 | cze_pol_1$uniquename1_party <- fr$Freq[match(cze_pol_1$name1_party, fr$Var1)]
151 | fr1_p <- fr[fr$Freq != 1,]
152 | fr1_p
153 |
154 | ### MATCHING
155 | ### Match only if:
156 | ### 1. the desired value is still missing
157 | ### 2. the pair name+party is unique in uk_pol (we assume that it's always true in ParlSpeech)
158 | ### 3. if possible, match manually
159 |
160 | ## Create empty columns
161 | cze_pol_1$speaker <- NA
162 |
163 | ## Match by name
164 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename == 1,
165 | ParlSpeech_1$speaker[match(cze_pol_1$name, ParlSpeech_1$speaker)],
166 | cze_pol_1$speaker)
167 | ## Match by name1
168 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename1 == 1,
169 | ParlSpeech_1$speaker[match(cze_pol_1$name1, ParlSpeech_1$speaker)],
170 | cze_pol_1$speaker)
171 | ## Match by name and party
172 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename_party == 1,
173 | ParlSpeech_1$speaker[match(cze_pol_1$name_party, ParlSpeech_1$name_party)],
174 | cze_pol_1$speaker)
175 | ## Match by name1 and party
176 | cze_pol_1$speaker <- ifelse(is.na(cze_pol_1$speaker) & cze_pol_1$uniquename1_party == 1,
177 | ParlSpeech_1$speaker[match(cze_pol_1$name1_party, ParlSpeech_1$name_party)],
178 | cze_pol_1$speaker)
179 | # Check the number of still missing data
180 | sum(is.na(cze_pol_1$speaker))
181 |
182 |
183 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
184 | # Create csv with unmatched data for manual matching (the code is commented since it is
185 | # just preparation for manual matching)
186 | # ParlSpeech_1$unname <- cze_pol_1$uniquename_party[match(ParlSpeech_1$name_party,cze_pol_1$name_party)]
187 | # ParlSpeech_1$unname1 <- cze_pol_1$uniquename1_party[match(ParlSpeech_1$name_party,cze_pol_1$name1_party)]
188 | # ParlSpeech_1$match <- NA
189 | # ParlSpeech_1$match <- ifelse(is.na(ParlSpeech_1$match) & ParlSpeech_1$unname == 1,
190 | # "matched", ParlSpeech_1$match)
191 | # ParlSpeech_1$match <- ifelse(is.na(ParlSpeech_1$match) & ParlSpeech_1$unname1 == 1,
192 | # "matched", ParlSpeech_1$match)
193 | # unmatched_ParlSpeech_1 <- ParlSpeech_1[is.na(ParlSpeech_1$match),]
194 | # unmatched_cze_pol_1 <- cze_pol_1[is.na(cze_pol_1$speaker),]
195 | # unmatched_cze_pol_1 <- unmatched_cze_pol_1[, !(colnames(unmatched_cze_pol_1) %in% c("name_split"))]
196 | # write.xlsx(unmatched_ParlSpeech_1, file = "unmatched_ParlSpeech_1.xlsx", row.names=FALSE)
197 | # write.xlsx(unmatched_cze_pol_1, file = "unmatched_cze_pol_1.xlsx", row.names=FALSE)
198 |
199 | # Match manually
200 | # no matches
201 |
202 |
203 | ### OUTPUT
204 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
205 | cze$ParlSpeech_speaker <- cze_pol_1$speaker[match(cze$wikidataid, cze_pol_1$wikidataid)]
206 |
207 |
208 |
209 |
210 | ################################### Session 2 ###################################
211 | ### Filter by 2d session
212 | cze_pol_2 <- cze_pol_1_7[cze_pol_1_7$start_year == 1996,]
213 | ParlSpeech_2 <- psp.corpus[psp.corpus$start_year == 1996,]
214 |
215 | ### NAMES
216 | ParlSpeech_2$name_party <- paste(ParlSpeech_2$speaker, ParlSpeech_2$party,
217 | sep = " ", collapse = NULL)
218 | ### FREQUENCY
219 | ## Name
220 | fr <- data.frame(table(as.matrix(cze_pol_2$name, useNA = "always")))
221 | cze_pol_2$uniquename <- fr$Freq[match(cze_pol_2$name, fr$Var1)]
222 | fr_ <- fr[fr$Freq != 1,]
223 | fr_
224 |
225 | ## Name1
226 | fr <- data.frame(table(as.matrix(cze_pol_2$name1, useNA = "always")))
227 | cze_pol_2$uniquename1 <- fr$Freq[match(cze_pol_2$name1, fr$Var1)]
228 | fr1_ <- fr[fr$Freq != 1,]
229 | fr1_
230 |
231 | ## Name + party
232 | fr <- data.frame(table(as.matrix(cze_pol_2$name_party, useNA = "always")))
233 | cze_pol_2$uniquename_party <- fr$Freq[match(cze_pol_2$name_party, fr$Var1)]
234 | fr_p <- fr[fr$Freq != 1,]
235 | fr_p
236 |
237 | ## Name1 + party
238 | fr <- data.frame(table(as.matrix(cze_pol_2$name1_party, useNA = "always")))
239 | cze_pol_2$uniquename1_party <- fr$Freq[match(cze_pol_2$name1_party, fr$Var1)]
240 | fr1_p <- fr[fr$Freq != 1,]
241 | fr1_p
242 |
243 | ### MATCHING
244 | ## Create empty columns
245 | cze_pol_2$speaker <- NA
246 |
247 | ## Match by name
248 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename == 1,
249 | ParlSpeech_2$speaker[match(cze_pol_2$name, ParlSpeech_2$speaker)],
250 | cze_pol_2$speaker)
251 | ## Match by name1
252 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename1 == 1,
253 | ParlSpeech_2$speaker[match(cze_pol_2$name1, ParlSpeech_2$speaker)],
254 | cze_pol_2$speaker)
255 | ## Match by name and party
256 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename_party == 1,
257 | ParlSpeech_2$speaker[match(cze_pol_2$name_party, ParlSpeech_2$name_party)],
258 | cze_pol_2$speaker)
259 | ## Match by name1 and party
260 | cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker) & cze_pol_2$uniquename1_party == 1,
261 | ParlSpeech_2$speaker[match(cze_pol_2$name1_party, ParlSpeech_2$name_party)],
262 | cze_pol_2$speaker)
263 | # Check the number of still missing data
264 | sum(is.na(cze_pol_2$speaker))
265 |
266 |
267 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
268 | # Create csv with unmatched data for manual matching (the code is commented since it is
269 | # just preparation for manual matching)
270 | # ParlSpeech_2$unname <- cze_pol_2$uniquename_party[match(ParlSpeech_2$name_party,cze_pol_2$name_party)]
271 | # ParlSpeech_2$unname1 <- cze_pol_2$uniquename1_party[match(ParlSpeech_2$name_party,cze_pol_2$name1_party)]
272 | # ParlSpeech_2$match <- NA
273 | # ParlSpeech_2$match <- ifelse(is.na(ParlSpeech_2$match) & ParlSpeech_2$unname == 1,
274 | # "matched", ParlSpeech_2$match)
275 | # ParlSpeech_2$match <- ifelse(is.na(ParlSpeech_2$match) & ParlSpeech_2$unname1 == 1,
276 | # "matched", ParlSpeech_2$match)
277 | # unmatched_ParlSpeech_2 <- ParlSpeech_2[is.na(ParlSpeech_2$match),]
278 | # cze_pol_2$speaker <- ifelse(is.na(cze_pol_2$speaker),
279 | # cze$ParlSpeech_speaker[match(cze_pol_2$wikidataid, cze$wikidataid)],
280 | # cze_pol_2$speaker)
281 | # unmatched_cze_pol_2 <- cze_pol_2[is.na(cze_pol_2$speaker),]
282 | # unmatched_cze_pol_2 <- unmatched_cze_pol_2[, !(colnames(unmatched_cze_pol_2) %in% c("name_split"))]
283 | # write.xlsx(unmatched_ParlSpeech_2, file = "unmatched_ParlSpeech_2.xlsx", row.names=FALSE)
284 | # write.xlsx(unmatched_cze_pol_2, file = "unmatched_cze_pol_2.xlsx", row.names=FALSE)
285 |
286 | # Match manually
287 | cze_pol_2$speaker[cze_pol_2$wikidataid == "Q10861579"] <- "Zuzka Bebarova Rujbrova"
288 |
289 |
290 | ### OUTPUT
291 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
292 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
293 | cze_pol_2$speaker[match(cze$wikidataid, cze_pol_2$wikidataid)],
294 | cze$ParlSpeech_speaker)
295 |
296 |
297 |
298 | ################################### Session 3 ###################################
299 | ### Filter by 3d session
300 | cze_pol_3 <- cze_pol_1_7[cze_pol_1_7$start_year == 1998,]
301 | ParlSpeech_3 <- psp.corpus[psp.corpus$start_year == 1998,]
302 |
303 | ### NAMES
304 | ParlSpeech_3$name_party <- paste(ParlSpeech_3$speaker, ParlSpeech_3$party,
305 | sep = " ", collapse = NULL)
306 | ### FREQUENCY
307 | ## Name
308 | fr <- data.frame(table(as.matrix(cze_pol_3$name, useNA = "always")))
309 | cze_pol_3$uniquename <- fr$Freq[match(cze_pol_3$name, fr$Var1)]
310 | fr_ <- fr[fr$Freq != 1,]
311 | fr_
312 |
313 | ## Name1
314 | fr <- data.frame(table(as.matrix(cze_pol_3$name1, useNA = "always")))
315 | cze_pol_3$uniquename1 <- fr$Freq[match(cze_pol_3$name1, fr$Var1)]
316 | fr1_ <- fr[fr$Freq != 1,]
317 | fr1_
318 |
319 | ## Name + party
320 | fr <- data.frame(table(as.matrix(cze_pol_3$name_party, useNA = "always")))
321 | cze_pol_3$uniquename_party <- fr$Freq[match(cze_pol_3$name_party, fr$Var1)]
322 | fr_p <- fr[fr$Freq != 1,]
323 | fr_p
324 |
325 | ## Name1 + party
326 | fr <- data.frame(table(as.matrix(cze_pol_3$name1_party, useNA = "always")))
327 | cze_pol_3$uniquename1_party <- fr$Freq[match(cze_pol_3$name1_party, fr$Var1)]
328 | fr1_p <- fr[fr$Freq != 1,]
329 | fr1_p
330 |
331 | ### MATCHING
332 | ## Create empty columns
333 | cze_pol_3$speaker <- NA
334 |
335 | ## Match by name
336 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename == 1,
337 | ParlSpeech_3$speaker[match(cze_pol_3$name, ParlSpeech_3$speaker)],
338 | cze_pol_3$speaker)
339 | ## Match by name1
340 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename1 == 1,
341 | ParlSpeech_3$speaker[match(cze_pol_3$name1, ParlSpeech_3$speaker)],
342 | cze_pol_3$speaker)
343 | ## Match by name and party
344 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename_party == 1,
345 | ParlSpeech_3$speaker[match(cze_pol_3$name_party, ParlSpeech_3$name_party)],
346 | cze_pol_3$speaker)
347 | ## Match by name1 and party
348 | cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker) & cze_pol_3$uniquename1_party == 1,
349 | ParlSpeech_3$speaker[match(cze_pol_3$name1_party, ParlSpeech_3$name_party)],
350 | cze_pol_3$speaker)
351 | # Check the number of still missing data
352 | sum(is.na(cze_pol_3$speaker))
353 |
354 |
355 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
356 | # Create csv with unmatched data for manual matching (the code is commented since it is
357 | # just preparation for manual matching)
358 | # ParlSpeech_3$unname <- cze_pol_3$uniquename_party[match(ParlSpeech_3$name_party,cze_pol_3$name_party)]
359 | # ParlSpeech_3$unname1 <- cze_pol_3$uniquename1_party[match(ParlSpeech_3$name_party,cze_pol_3$name1_party)]
360 | # ParlSpeech_3$match <- NA
361 | # ParlSpeech_3$match <- ifelse(is.na(ParlSpeech_3$match) & ParlSpeech_3$unname == 1,
362 | # "matched", ParlSpeech_3$match)
363 | # ParlSpeech_3$match <- ifelse(is.na(ParlSpeech_3$match) & ParlSpeech_3$unname1 == 1,
364 | # "matched", ParlSpeech_3$match)
365 | # unmatched_ParlSpeech_3 <- ParlSpeech_3[is.na(ParlSpeech_3$match),]
366 | # cze_pol_3$speaker <- ifelse(is.na(cze_pol_3$speaker),
367 | # cze$ParlSpeech_speaker[match(cze_pol_3$wikidataid, cze$wikidataid)],
368 | # cze_pol_3$speaker)
369 | # unmatched_cze_pol_3 <- cze_pol_3[is.na(cze_pol_3$speaker),]
370 | # unmatched_cze_pol_3 <- unmatched_cze_pol_3[, !(colnames(unmatched_cze_pol_3) %in% c("name_split"))]
371 | # write.csv(unmatched_ParlSpeech_3, file = "unmatched_ParlSpeech_3.csv", row.names=FALSE)
372 | # write.xlsx(unmatched_cze_pol_3, file = "unmatched_cze_pol_3.xlsx", row.names=FALSE)
373 |
374 | # Match manually
375 | cze_pol_3$speaker[cze_pol_3$wikidataid == "Q12037465"] <- "Miloslav Kucera ml"
376 | # for older one: Miloslav Kucera or Miloslav Kucera st?
377 | # Check the number of still missing data
378 | sum(is.na(cze_pol_3$speaker))
379 |
380 |
381 | ### OUTPUT
382 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
383 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
384 | cze_pol_3$speaker[match(cze$wikidataid, cze_pol_3$wikidataid)],
385 | cze$ParlSpeech_speaker)
386 |
387 |
388 |
389 | ################################### Session 4 ###################################
390 | ### Filter by 4th session
391 | cze_pol_4 <- cze_pol_1_7[cze_pol_1_7$start_year == 2002,]
392 | ParlSpeech_4 <- psp.corpus[psp.corpus$start_year == 2002,]
393 |
394 | ### NAMES
395 | ParlSpeech_4$name_party <- paste(ParlSpeech_4$speaker, ParlSpeech_4$party,
396 | sep = " ", collapse = NULL)
397 | cze_pol_4$name2 <- paste(cze_pol_4$lastname, cze_pol_4$firstname,
398 | sep = " ", collapse = NULL)
399 | cze_pol_4$name2_party <- paste(cze_pol_4$name2, cze_pol_4$party,
400 | sep = " ", collapse = NULL)
401 |
402 | ### FREQUENCY
403 | ## Name
404 | fr <- data.frame(table(as.matrix(cze_pol_4$name, useNA = "always")))
405 | cze_pol_4$uniquename <- fr$Freq[match(cze_pol_4$name, fr$Var1)]
406 | fr_ <- fr[fr$Freq != 1,]
407 | fr_
408 |
409 | ## Name1
410 | fr <- data.frame(table(as.matrix(cze_pol_4$name1, useNA = "always")))
411 | cze_pol_4$uniquename1 <- fr$Freq[match(cze_pol_4$name1, fr$Var1)]
412 | fr1_ <- fr[fr$Freq != 1,]
413 | fr1_
414 |
415 | ## Name2
416 | fr <- data.frame(table(as.matrix(cze_pol_4$name2, useNA = "always")))
417 | cze_pol_4$uniquename2 <- fr$Freq[match(cze_pol_4$name2, fr$Var1)]
418 | fr2_ <- fr[fr$Freq != 1,]
419 | fr2_
420 |
421 | ## Name + party
422 | fr <- data.frame(table(as.matrix(cze_pol_4$name_party, useNA = "always")))
423 | cze_pol_4$uniquename_party <- fr$Freq[match(cze_pol_4$name_party, fr$Var1)]
424 | fr_p <- fr[fr$Freq != 1,]
425 | fr_p
426 |
427 | ## Name1 + party
428 | fr <- data.frame(table(as.matrix(cze_pol_4$name1_party, useNA = "always")))
429 | cze_pol_4$uniquename1_party <- fr$Freq[match(cze_pol_4$name1_party, fr$Var1)]
430 | fr1_p <- fr[fr$Freq != 1,]
431 | fr1_p
432 |
433 | ## Name2 + party
434 | fr <- data.frame(table(as.matrix(cze_pol_4$name2_party, useNA = "always")))
435 | cze_pol_4$uniquename2_party <- fr$Freq[match(cze_pol_4$name2_party, fr$Var1)]
436 | fr2_p <- fr[fr$Freq != 1,]
437 | fr2_p
438 |
439 |
440 | ### MATCHING
441 | ## Create empty columns
442 | cze_pol_4$speaker <- NA
443 |
444 | ## Match by name and party
445 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename_party == 1,
446 | ParlSpeech_4$speaker[match(cze_pol_4$name_party, ParlSpeech_4$name_party)],
447 | cze_pol_4$speaker)
448 | ## Match by name1 and party
449 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename1_party == 1,
450 | ParlSpeech_4$speaker[match(cze_pol_4$name1_party, ParlSpeech_4$name_party)],
451 | cze_pol_4$speaker)
452 | ## Match by name
453 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename == 1,
454 | ParlSpeech_4$speaker[match(cze_pol_4$name, ParlSpeech_4$speaker)],
455 | cze_pol_4$speaker)
456 | ## Match by name1
457 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename1 == 1,
458 | ParlSpeech_4$speaker[match(cze_pol_4$name1, ParlSpeech_4$speaker)],
459 | cze_pol_4$speaker)
460 | # Check the number of still missing data
461 | sum(is.na(cze_pol_4$speaker))
462 | ## Match by name2 and party
463 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename2_party == 1,
464 | ParlSpeech_4$speaker[match(cze_pol_4$name2_party, ParlSpeech_4$name_party)],
465 | cze_pol_4$speaker)
466 | ## Match by name2
467 | cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker) & cze_pol_4$uniquename2 == 1,
468 | ParlSpeech_4$speaker[match(cze_pol_4$name2, ParlSpeech_4$speaker)],
469 | cze_pol_4$speaker)
470 | # Check the number of still missing data
471 | sum(is.na(cze_pol_4$speaker))
472 |
473 |
474 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
475 | # Create csv with unmatched data for manual matching (the code is commented since it is
476 | # just preparation for manual matching)
477 | # ParlSpeech_4$unname <- cze_pol_4$uniquename_party[match(ParlSpeech_4$name_party,cze_pol_4$name_party)]
478 | # ParlSpeech_4$unname1 <- cze_pol_4$uniquename1_party[match(ParlSpeech_4$name_party,cze_pol_4$name1_party)]
479 | # ParlSpeech_4$match <- NA
480 | # ParlSpeech_4$match <- ifelse(is.na(ParlSpeech_4$match) & ParlSpeech_4$unname == 1,
481 | # "matched", ParlSpeech_4$match)
482 | # ParlSpeech_4$match <- ifelse(is.na(ParlSpeech_4$match) & ParlSpeech_4$unname1 == 1,
483 | # "matched", ParlSpeech_4$match)
484 | # unmatched_ParlSpeech_4 <- ParlSpeech_4[is.na(ParlSpeech_4$match),]
485 | # cze_pol_4$speaker <- ifelse(is.na(cze_pol_4$speaker),
486 | # cze$ParlSpeech_speaker[match(cze_pol_4$wikidataid, cze$wikidataid)],
487 | # cze_pol_4$speaker)
488 | # unmatched_cze_pol_4 <- cze_pol_4[is.na(cze_pol_4$speaker),]
489 | # unmatched_cze_pol_4 <- unmatched_cze_pol_4[, !(colnames(unmatched_cze_pol_4) %in% c("name_split"))]
490 |
491 | # Match manually
492 | # no matches
493 |
494 | ### OUTPUT
495 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
496 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
497 | cze_pol_4$speaker[match(cze$wikidataid, cze_pol_4$wikidataid)],
498 | cze$ParlSpeech_speaker)
499 |
500 |
501 |
502 | ################################### Session 5 ###################################
503 | ### Filter by 5th session
504 | cze_pol_5 <- cze_pol_1_7[cze_pol_1_7$start_year == 2006,]
505 | ParlSpeech_5 <- psp.corpus[psp.corpus$start_year == 2006,]
506 |
507 | ### NAMES
508 | ParlSpeech_5$name_party <- paste(ParlSpeech_5$speaker, ParlSpeech_5$party,
509 | sep = " ", collapse = NULL)
510 | cze_pol_5$name2 <- paste(cze_pol_5$lastname, cze_pol_5$firstname,
511 | sep = " ", collapse = NULL)
512 | cze_pol_5$name2_party <- paste(cze_pol_5$name2, cze_pol_5$party,
513 | sep = " ", collapse = NULL)
514 |
515 | ### FREQUENCY
516 | ## Name
517 | fr <- data.frame(table(as.matrix(cze_pol_5$name, useNA = "always")))
518 | cze_pol_5$uniquename <- fr$Freq[match(cze_pol_5$name, fr$Var1)]
519 | fr_ <- fr[fr$Freq != 1,]
520 | fr_
521 |
522 | ## Name1
523 | fr <- data.frame(table(as.matrix(cze_pol_5$name1, useNA = "always")))
524 | cze_pol_5$uniquename1 <- fr$Freq[match(cze_pol_5$name1, fr$Var1)]
525 | fr1_ <- fr[fr$Freq != 1,]
526 | fr1_
527 |
528 | ## Name + party
529 | fr <- data.frame(table(as.matrix(cze_pol_5$name_party, useNA = "always")))
530 | cze_pol_5$uniquename_party <- fr$Freq[match(cze_pol_5$name_party, fr$Var1)]
531 | fr_p <- fr[fr$Freq != 1,]
532 | fr_p
533 |
534 | ## Name1 + party
535 | fr <- data.frame(table(as.matrix(cze_pol_5$name1_party, useNA = "always")))
536 | cze_pol_5$uniquename1_party <- fr$Freq[match(cze_pol_5$name1_party, fr$Var1)]
537 | fr1_p <- fr[fr$Freq != 1,]
538 | fr1_p
539 |
540 | ## Name2
541 | fr <- data.frame(table(as.matrix(cze_pol_5$name2, useNA = "always")))
542 | cze_pol_5$uniquename2 <- fr$Freq[match(cze_pol_5$name2, fr$Var1)]
543 | fr2_ <- fr[fr$Freq != 1,]
544 | fr2_
545 |
546 | ## Name2 + party
547 | fr <- data.frame(table(as.matrix(cze_pol_5$name2_party, useNA = "always")))
548 | cze_pol_5$uniquename2_party <- fr$Freq[match(cze_pol_5$name2_party, fr$Var1)]
549 | fr2_p <- fr[fr$Freq != 1,]
550 | fr2_p
551 |
552 |
553 | ### MATCHING
554 | ## Create empty columns
555 | cze_pol_5$speaker <- NA
556 |
557 | ## Match by name and party
558 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename_party == 1,
559 | ParlSpeech_5$speaker[match(cze_pol_5$name_party, ParlSpeech_5$name_party)],
560 | cze_pol_5$speaker)
561 | ## Match by name
562 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename == 1,
563 | ParlSpeech_5$speaker[match(cze_pol_5$name, ParlSpeech_5$speaker)],
564 | cze_pol_5$speaker)
565 | ## Match by name1 and party
566 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename1_party == 1,
567 | ParlSpeech_5$speaker[match(cze_pol_5$name1_party, ParlSpeech_5$name_party)],
568 | cze_pol_5$speaker)
569 | ## Match by name1
570 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename1 == 1,
571 | ParlSpeech_5$speaker[match(cze_pol_5$name1, ParlSpeech_5$speaker)],
572 | cze_pol_5$speaker)
573 | # Check the number of still missing data
574 | sum(is.na(cze_pol_5$speaker))
575 | ## Match by name2 and party
576 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename2_party == 1,
577 | ParlSpeech_5$speaker[match(cze_pol_5$name2_party, ParlSpeech_5$name_party)],
578 | cze_pol_5$speaker)
579 | ## Match by name2
580 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker) & cze_pol_5$uniquename2 == 1,
581 | ParlSpeech_5$speaker[match(cze_pol_5$name2, ParlSpeech_5$speaker)],
582 | cze_pol_5$speaker)
583 | # Check the number of still missing data
584 | sum(is.na(cze_pol_5$speaker))
585 |
586 |
587 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
588 | # Create csv with unmatched data for manual matching (the code is commented since it is
589 | # just preparation for manual matching)
590 | ParlSpeech_5$unname <- cze_pol_5$uniquename_party[match(ParlSpeech_5$name_party,cze_pol_5$name_party)]
591 | ParlSpeech_5$unname1 <- cze_pol_5$uniquename1_party[match(ParlSpeech_5$name_party,cze_pol_5$name1_party)]
592 | ParlSpeech_5$match <- NA
593 | ParlSpeech_5$match <- ifelse(is.na(ParlSpeech_5$match) & ParlSpeech_5$unname == 1,
594 | "matched", ParlSpeech_5$match)
595 | ParlSpeech_5$match <- ifelse(is.na(ParlSpeech_5$match) & ParlSpeech_5$unname1 == 1,
596 | "matched", ParlSpeech_5$match)
597 | unmatched_ParlSpeech_5 <- ParlSpeech_5[is.na(ParlSpeech_5$match),]
598 | cze_pol_5$speaker <- ifelse(is.na(cze_pol_5$speaker),
599 | cze$ParlSpeech_speaker[match(cze_pol_5$wikidataid, cze$wikidataid)],
600 | cze_pol_5$speaker)
601 | unmatched_cze_pol_5 <- cze_pol_5[is.na(cze_pol_5$speaker),]
602 | unmatched_cze_pol_5 <- unmatched_cze_pol_5[, !(colnames(unmatched_cze_pol_5) %in% c("name_split"))]
603 |
604 | # Match manually
605 | # no matches
606 |
607 |
608 | ### OUTPUT
609 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
610 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
611 | cze_pol_5$speaker[match(cze$wikidataid, cze_pol_5$wikidataid)],
612 | cze$ParlSpeech_speaker)
613 |
614 |
615 |
616 | ################################### Session 6 ###################################
617 | ### Filter by 6th session
618 | cze_pol_6 <- cze_pol_1_7[cze_pol_1_7$start_year == 2010,]
619 | ParlSpeech_6 <- psp.corpus[psp.corpus$start_year == 2010,]
620 |
621 | ### NAMES
622 | ParlSpeech_6$name_party <- paste(ParlSpeech_6$speaker, ParlSpeech_6$party,
623 | sep = " ", collapse = NULL)
624 | ### FREQUENCY
625 | ## Name
626 | fr <- data.frame(table(as.matrix(cze_pol_6$name, useNA = "always")))
627 | cze_pol_6$uniquename <- fr$Freq[match(cze_pol_6$name, fr$Var1)]
628 | fr_ <- fr[fr$Freq != 1,]
629 | fr_
630 |
631 | ## Name1
632 | fr <- data.frame(table(as.matrix(cze_pol_6$name1, useNA = "always")))
633 | cze_pol_6$uniquename1 <- fr$Freq[match(cze_pol_6$name1, fr$Var1)]
634 | fr1_ <- fr[fr$Freq != 1,]
635 | fr1_
636 |
637 | ## Name + party
638 | fr <- data.frame(table(as.matrix(cze_pol_6$name_party, useNA = "always")))
639 | cze_pol_6$uniquename_party <- fr$Freq[match(cze_pol_6$name_party, fr$Var1)]
640 | fr_p <- fr[fr$Freq != 1,]
641 | fr_p
642 |
643 | ## Name1 + party
644 | fr <- data.frame(table(as.matrix(cze_pol_6$name1_party, useNA = "always")))
645 | cze_pol_6$uniquename1_party <- fr$Freq[match(cze_pol_6$name1_party, fr$Var1)]
646 | fr1_p <- fr[fr$Freq != 1,]
647 | fr1_p
648 |
649 | ### MATCHING
650 | ## Create empty columns
651 | cze_pol_6$speaker <- NA
652 |
653 | ## Match by name
654 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename == 1,
655 | ParlSpeech_6$speaker[match(cze_pol_6$name, ParlSpeech_6$speaker)],
656 | cze_pol_6$speaker)
657 | ## Match by name1
658 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename1 == 1,
659 | ParlSpeech_6$speaker[match(cze_pol_6$name1, ParlSpeech_6$speaker)],
660 | cze_pol_6$speaker)
661 | ## Match by name and party
662 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename_party == 1,
663 | ParlSpeech_6$speaker[match(cze_pol_6$name_party, ParlSpeech_6$name_party)],
664 | cze_pol_6$speaker)
665 | ## Match by name1 and party
666 | cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker) & cze_pol_6$uniquename1_party == 1,
667 | ParlSpeech_6$speaker[match(cze_pol_6$name1_party, ParlSpeech_6$name_party)],
668 | cze_pol_6$speaker)
669 | # Check the number of still missing data
670 | sum(is.na(cze_pol_6$speaker))
671 |
672 |
673 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
674 | # Create csv with unmatched data for manual matching (the code is commented since it is
675 | # just preparation for manual matching)
676 | # ParlSpeech_6$unname <- cze_pol_6$uniquename_party[match(ParlSpeech_6$name_party,cze_pol_6$name_party)]
677 | # ParlSpeech_6$unname1 <- cze_pol_6$uniquename1_party[match(ParlSpeech_6$name_party,cze_pol_6$name1_party)]
678 | # ParlSpeech_6$match <- NA
679 | # ParlSpeech_6$match <- ifelse(is.na(ParlSpeech_6$match) & ParlSpeech_6$unname == 1,
680 | # "matched", ParlSpeech_6$match)
681 | # ParlSpeech_6$match <- ifelse(is.na(ParlSpeech_6$match) & ParlSpeech_6$unname1 == 1,
682 | # "matched", ParlSpeech_6$match)
683 | # unmatched_ParlSpeech_6 <- ParlSpeech_6[is.na(ParlSpeech_6$match),]
684 | # cze_pol_6$speaker <- ifelse(is.na(cze_pol_6$speaker),
685 | # cze$ParlSpeech_speaker[match(cze_pol_6$wikidataid, cze$wikidataid)],
686 | # cze_pol_6$speaker)
687 | # unmatched_cze_pol_6 <- cze_pol_6[is.na(cze_pol_6$speaker),]
688 | # unmatched_cze_pol_6 <- unmatched_cze_pol_6[, !(colnames(unmatched_cze_pol_6) %in% c("name_split"))]
689 |
690 | # Match manually
691 | cze_pol_6$speaker[cze_pol_6$wikidataid == "Q12026431"] <- "Josef Novotny ml"
692 | cze_pol_6$speaker[cze_pol_6$wikidataid == "Q12026433"] <- "Josef Novotny st"
693 |
694 |
695 | ### OUTPUT
696 | ## Add ParlSpeech speaker's id to uk from uk_pol by wikidataid
697 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
698 | cze_pol_6$speaker[match(cze$wikidataid, cze_pol_6$wikidataid)],
699 | cze$ParlSpeech_speaker)
700 |
701 |
702 |
703 | ################################### Session 7 ###################################
704 | ### Filter by 7th session
705 | cze_pol_7 <- cze_pol_1_7[cze_pol_1_7$start_year == 2013,]
706 | ParlSpeech_7 <- psp.corpus[psp.corpus$start_year == 2013,]
707 |
708 | ### NAMES
709 | ParlSpeech_7$name_party <- paste(ParlSpeech_7$speaker, ParlSpeech_7$party,
710 | sep = " ", collapse = NULL)
711 | ### FREQUENCY
712 | ## Name
713 | fr <- data.frame(table(as.matrix(cze_pol_7$name, useNA = "always")))
714 | cze_pol_7$uniquename <- fr$Freq[match(cze_pol_7$name, fr$Var1)]
715 | fr_ <- fr[fr$Freq != 1,]
716 | fr_
717 |
718 | ## Name1
719 | fr <- data.frame(table(as.matrix(cze_pol_7$name1, useNA = "always")))
720 | cze_pol_7$uniquename1 <- fr$Freq[match(cze_pol_7$name1, fr$Var1)]
721 | fr1_ <- fr[fr$Freq != 1,]
722 | fr1_
723 |
724 | ## Name + party
725 | fr <- data.frame(table(as.matrix(cze_pol_7$name_party, useNA = "always")))
726 | cze_pol_7$uniquename_party <- fr$Freq[match(cze_pol_7$name_party, fr$Var1)]
727 | fr_p <- fr[fr$Freq != 1,]
728 | fr_p
729 |
730 | ## Name1 + party
731 | fr <- data.frame(table(as.matrix(cze_pol_7$name1_party, useNA = "always")))
732 | cze_pol_7$uniquename1_party <- fr$Freq[match(cze_pol_7$name1_party, fr$Var1)]
733 | fr1_p <- fr[fr$Freq != 1,]
734 | fr1_p
735 |
736 | ### MATCHING
737 | ## Create empty columns
738 | cze_pol_7$speaker <- NA
739 |
740 | ## Match by name
741 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename == 1,
742 | ParlSpeech_7$speaker[match(cze_pol_7$name, ParlSpeech_7$speaker)],
743 | cze_pol_7$speaker)
744 | ## Match by name1
745 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename1 == 1,
746 | ParlSpeech_7$speaker[match(cze_pol_7$name1, ParlSpeech_7$speaker)],
747 | cze_pol_7$speaker)
748 | ## Match by name and party
749 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename_party == 1,
750 | ParlSpeech_7$speaker[match(cze_pol_7$name_party, ParlSpeech_7$name_party)],
751 | cze_pol_7$speaker)
752 | ## Match by name1 and party
753 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker) & cze_pol_7$uniquename1_party == 1,
754 | ParlSpeech_7$speaker[match(cze_pol_7$name1_party, ParlSpeech_7$name_party)],
755 | cze_pol_7$speaker)
756 | # Check the number of still missing data
757 | sum(is.na(cze_pol_7$speaker))
758 |
759 |
760 | ## Match manually (check which existing in ParlSpeech values are not in cze_pol)
761 | # Create csv with unmatched data for manual matching (the code is commented since it is
762 | # just preparation for manual matching)
763 | ParlSpeech_7$unname <- cze_pol_7$uniquename_party[match(ParlSpeech_7$name_party,cze_pol_7$name_party)]
764 | ParlSpeech_7$unname1 <- cze_pol_7$uniquename1_party[match(ParlSpeech_7$name_party,cze_pol_7$name1_party)]
765 | ParlSpeech_7$match <- NA
766 | ParlSpeech_7$match <- ifelse(is.na(ParlSpeech_7$match) & ParlSpeech_7$unname == 1,
767 | "matched", ParlSpeech_7$match)
768 | ParlSpeech_7$match <- ifelse(is.na(ParlSpeech_7$match) & ParlSpeech_7$unname1 == 1,
769 | "matched", ParlSpeech_7$match)
770 | unmatched_ParlSpeech_7 <- ParlSpeech_7[is.na(ParlSpeech_7$match),]
771 | cze_pol_7$speaker <- ifelse(is.na(cze_pol_7$speaker),
772 | cze$ParlSpeech_speaker[match(cze_pol_7$wikidataid, cze$wikidataid)],
773 | cze_pol_7$speaker)
774 | unmatched_cze_pol_7 <- cze_pol_7[is.na(cze_pol_7$speaker),]
775 | unmatched_cze_pol_7 <- unmatched_cze_pol_7[, !(colnames(unmatched_cze_pol_7) %in% c("name_split"))]
776 |
777 | # Match manually
778 | cze_pol_7$speaker[cze_pol_7$wikidataid == "Q13721260"] <- "Tomas Jan Podivinsky"
779 |
780 |
781 | ### OUTPUT
782 | ## Add ParlSpeech speaker's id to cze from cze_pol by wikidataid
783 | cze$ParlSpeech_speaker <- ifelse(is.na(cze$ParlSpeech_speaker),
784 | cze_pol_7$speaker[match(cze$wikidataid, cze_pol_7$wikidataid)],
785 | cze$ParlSpeech_speaker)
786 |
787 |
788 |
789 | ################################### THE OUTPUT DATAFRAME ###################################
790 | # output
791 | cze_output <- subset(cze, select = c('wikidataid', 'ParlSpeech_speaker'))
792 | sum(!is.na(cze_output$ParlSpeech_speaker))
793 | # We found 880 out of 1096 (according pdf to ParlSpeech data set)
794 | cze_output <- cze_output[!is.na(cze_output$ParlSpeech_speaker),]
795 | saveRDS(cze_output, "./data/pol_sci_data/cze_output")
796 |
797 |
798 |
799 |
800 |
--------------------------------------------------------------------------------