├── R ├── .Rhistory ├── desktop.ini ├── translate.R ├── translate_vec.R └── translate_df.R ├── .Rproj.user ├── DC33445C │ ├── console06 │ │ └── INDEX001 │ ├── cpp-definition-cache │ ├── sources │ │ ├── prop │ │ │ ├── 30E3E6E4 │ │ │ ├── 78303AE1 │ │ │ ├── 876EA2C3 │ │ │ ├── 975E51C4 │ │ │ ├── B3C7D2B2 │ │ │ └── INDEX │ │ └── s-8812577A │ │ │ ├── lock_file │ │ │ ├── 35CFFFCE │ │ │ ├── F27A9EBB │ │ │ ├── 1D0D1931 │ │ │ ├── 17978B3F-contents │ │ │ ├── 35CFFFCE-contents │ │ │ ├── 1D0D1931-contents │ │ │ ├── F27A9EBB-contents │ │ │ └── 42984BE0-contents │ ├── rmd-outputs │ ├── saved_source_markers │ ├── pcs │ │ ├── source-pane.pper │ │ ├── workbench-pane.pper │ │ ├── files-pane.pper │ │ └── windowlayoutstate.pper │ ├── build_options │ └── persistent-state └── shared │ └── notebooks │ ├── patch-chunk-names │ ├── B7880FC2-README │ └── 1 │ │ ├── s │ │ └── chunks.json │ │ └── DC33445C8812577A │ │ └── chunks.json │ └── paths ├── .Rhistory ├── NAMESPACE ├── .Rbuildignore ├── .gitattributes ├── desktop.ini ├── man ├── desktop.ini ├── translate_vec.Rd ├── translate.Rd └── translate_df.Rd ├── DESCRIPTION ├── deeplr.Rproj ├── README.Rmd └── README.md /R/.Rhistory: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/console06/INDEX001: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/cpp-definition-cache: -------------------------------------------------------------------------------- 1 | [ 2 | ] -------------------------------------------------------------------------------- /.Rproj.user/shared/notebooks/patch-chunk-names: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | library(deeplr) 2 | library(deeplr) 3 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/30E3E6E4: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/78303AE1: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/lock_file: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/rmd-outputs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/saved_source_markers: -------------------------------------------------------------------------------- 1 | {"active_set":"","sets":[]} -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/pcs/source-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "activeTab" : 1 3 | } -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^README-.*\.png$ 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/876EA2C3: -------------------------------------------------------------------------------- 1 | { 2 | "cursorPosition" : "20,38", 3 | "scrollLine" : "38" 4 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/975E51C4: -------------------------------------------------------------------------------- 1 | { 2 | "cursorPosition" : "18,220", 3 | "scrollLine" : "9" 4 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/B3C7D2B2: -------------------------------------------------------------------------------- 1 | { 2 | "cursorPosition" : "122,0", 3 | "scrollLine" : "62" 4 | } -------------------------------------------------------------------------------- /.Rproj.user/shared/notebooks/B7880FC2-README/1/s/chunks.json: -------------------------------------------------------------------------------- 1 | {"chunk_definitions":[],"doc_write_time":1533230084} -------------------------------------------------------------------------------- /.Rproj.user/shared/notebooks/B7880FC2-README/1/DC33445C8812577A/chunks.json: -------------------------------------------------------------------------------- 1 | {"chunk_definitions":[],"doc_write_time":1533230084} -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/pcs/workbench-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "TabSet1" : 3, 3 | "TabSet2" : 3, 4 | "TabZoom" : { 5 | } 6 | } -------------------------------------------------------------------------------- /R/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | InfoTip=Dieser Ordner wird online freigegeben. 3 | IconFile=C:\PROGRA~2\Google\Drive\GOOGLE~1.EXE 4 | IconIndex=12 5 | -------------------------------------------------------------------------------- /desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | InfoTip=Dieser Ordner wird online freigegeben. 3 | IconFile=C:\PROGRA~2\Google\Drive\GOOGLE~1.EXE 4 | IconIndex=12 5 | -------------------------------------------------------------------------------- /man/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | InfoTip=Dieser Ordner wird online freigegeben. 3 | IconFile=C:\PROGRA~2\Google\Drive\GOOGLE~1.EXE 4 | IconIndex=12 5 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/pcs/files-pane.pper: -------------------------------------------------------------------------------- 1 | { 2 | "path" : "C:/GoogleDrive/7-Software/deeplr/R", 3 | "sortOrder" : [ 4 | { 5 | "ascending" : true, 6 | "columnIndex" : 2 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /.Rproj.user/shared/notebooks/paths: -------------------------------------------------------------------------------- 1 | C:/GoogleDrive/1-Research/2018_The_Polarization_of_Political_Trust/paper.Rmd="F405394C" 2 | C:/GoogleDrive/7-Software/deeplr/README.Rmd="B7880FC2" 3 | C:/GoogleDrive/7-Software/deeplr/README.md="9E60C0AF" 4 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/build_options: -------------------------------------------------------------------------------- 1 | auto_roxygenize_for_build_and_reload="1" 2 | auto_roxygenize_for_build_package="1" 3 | auto_roxygenize_for_check="1" 4 | live_preview_website="1" 5 | makefile_args="" 6 | preview_website="1" 7 | website_output_format="all" 8 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/pcs/windowlayoutstate.pper: -------------------------------------------------------------------------------- 1 | { 2 | "left" : { 3 | "panelheight" : 1301, 4 | "splitterpos" : 535, 5 | "topwindowstate" : "NORMAL", 6 | "windowheight" : 1339 7 | }, 8 | "right" : { 9 | "panelheight" : 1301, 10 | "splitterpos" : 708, 11 | "topwindowstate" : "NORMAL", 12 | "windowheight" : 1339 13 | } 14 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/prop/INDEX: -------------------------------------------------------------------------------- 1 | C%3A%2FGoogleDrive%2F1-Research%2F2018_The_Polarization_of_Political_Trust%2Fpaper.Rmd="30E3E6E4" 2 | C%3A%2FGoogleDrive%2F7-Software%2Fdeeplr%2FR%2Ftranslate_df.R="B3C7D2B2" 3 | C%3A%2FGoogleDrive%2F7-Software%2Fdeeplr%2FR%2Ftranslate_vec.R="876EA2C3" 4 | C%3A%2FGoogleDrive%2F7-Software%2Fdeeplr%2FREADME.Rmd="975E51C4" 5 | C%3A%2FGoogleDrive%2F7-Software%2Fdeeplr%2FREADME.md="78303AE1" 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: deeplr 2 | Type: Package 3 | Title: Translate with DeepL 4 | Version: 0.1.0 5 | Author: Paul C. Bauer 6 | Maintainer: Paul C. Bauer 7 | Description: Functions to translate relying on DeepL API (https://www.deepl.com/translator). 8 | License: CC BY-NC-SA 4.0 9 | Encoding: UTF-8 10 | LazyData: true 11 | Depends: httr, jsonlite, dplyr, stringr, svMisc, purrr 12 | RoxygenNote: 6.1.0 13 | -------------------------------------------------------------------------------- /deeplr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/35CFFFCE: -------------------------------------------------------------------------------- 1 | { 2 | "collab_server" : "", 3 | "contents" : "", 4 | "created" : 1533230057241.000, 5 | "dirty" : false, 6 | "encoding" : "UTF-8", 7 | "folds" : "", 8 | "hash" : "1677552591", 9 | "id" : "35CFFFCE", 10 | "lastKnownWriteTime" : 1533231563, 11 | "last_content_update" : 1533231563098, 12 | "path" : "C:/GoogleDrive/7-Software/deeplr/README.Rmd", 13 | "project_path" : "README.Rmd", 14 | "properties" : { 15 | "cursorPosition" : "18,220", 16 | "scrollLine" : "9" 17 | }, 18 | "relative_order" : 3, 19 | "source_on_save" : false, 20 | "source_window" : "", 21 | "type" : "r_markdown" 22 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/F27A9EBB: -------------------------------------------------------------------------------- 1 | { 2 | "collab_server" : "", 3 | "contents" : "", 4 | "created" : 1533228305728.000, 5 | "dirty" : false, 6 | "encoding" : "UTF-8", 7 | "folds" : "", 8 | "hash" : "0", 9 | "id" : "F27A9EBB", 10 | "lastKnownWriteTime" : 1533229752, 11 | "last_content_update" : 1533229752173, 12 | "path" : "C:/GoogleDrive/7-Software/deeplr/R/translate_df.R", 13 | "project_path" : "R/translate_df.R", 14 | "properties" : { 15 | "cursorPosition" : "122,0", 16 | "scrollLine" : "62" 17 | }, 18 | "relative_order" : 1, 19 | "source_on_save" : false, 20 | "source_window" : "", 21 | "type" : "r_source" 22 | } -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/1D0D1931: -------------------------------------------------------------------------------- 1 | { 2 | "collab_server" : "", 3 | "contents" : "", 4 | "created" : 1533228329894.000, 5 | "dirty" : false, 6 | "encoding" : "UTF-8", 7 | "folds" : "", 8 | "hash" : "0", 9 | "id" : "1D0D1931", 10 | "lastKnownWriteTime" : 1533229648, 11 | "last_content_update" : 1533229648387, 12 | "path" : "C:/GoogleDrive/7-Software/deeplr/R/translate_vec.R", 13 | "project_path" : "R/translate_vec.R", 14 | "properties" : { 15 | "cursorPosition" : "20,38", 16 | "scrollLine" : "38" 17 | }, 18 | "relative_order" : 2, 19 | "source_on_save" : false, 20 | "source_window" : "", 21 | "type" : "r_source" 22 | } -------------------------------------------------------------------------------- /man/translate_vec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/translate_vec.R 3 | \name{translate_vec} 4 | \alias{translate_vec} 5 | \title{Translates a character vector.} 6 | \usage{ 7 | translate_vec(dataset = NULL, source.lang = "DE", target.lang = "EN", 8 | add.source.lang = FALSE, auth_key = NULL, 9 | url = "https://api.deepl.com/v2/translate?text=") 10 | } 11 | \arguments{ 12 | \item{dataset}{A character vector.} 13 | 14 | \item{source.lang}{Source language. Either "detect" or one of "EN", "FR", etc.} 15 | 16 | \item{target.lang}{Target language. One of "EN", "FR", etc.} 17 | 18 | \item{add.source.lang}{A logical. If TRUE adds the preset or detected language for each 19 | translation. FALSE (default) adds nothing.} 20 | 21 | \item{auth_key}{Your API key.} 22 | 23 | \item{url}{Url to DeepL API.} 24 | } 25 | \value{ 26 | A character vector. If add.source.lang = TRUE a dataframe with an additional 27 | column "source_lang". 28 | } 29 | \description{ 30 | Translates a character vector. 31 | } 32 | \examples{ 33 | \dontrun{ 34 | dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 35 | "The whole problem with the world is that fools are always so certain of themselves") 36 | 37 | # Without language detection and without adding set/detected source language. 38 | translate_vec(dataset = dat, 39 | source.lang = "EN", 40 | target.lang = "DE", 41 | auth_key = "enter you auth key here") 42 | 43 | dat <- c("A dog.", 44 | "Un chien.", 45 | "Un perro.", 46 | "Un cane.", 47 | "Een hond.") 48 | 49 | # With language detection and with adding set/detected source language. 50 | translate_vec(dataset = dat, 51 | source.lang = "detect", 52 | target.lang = "DE", 53 | add.source.lang = TRUE, 54 | auth_key = "enter you auth key here") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /man/translate.Rd: -------------------------------------------------------------------------------- 1 | \name{translate} 2 | \alias{translate} 3 | \title{translate using DeepL} 4 | \usage{ 5 | translate(dataset = NULL, 6 | column.name = NULL, 7 | source.lang = "EN", 8 | target.lang = "DE", 9 | url = "https://www.deepl.com/jsonrpc") 10 | } 11 | \arguments{ 12 | 13 | \item{dataset}{A character vector or dataframe (with a column containing the text to be translated).} 14 | \item{column.name}{If a dataframe is passed to "dataset", the name of the column containing the text 15 | must be passed to "column.name".} 16 | \item{source.lang}{The language code that corresponds with the language in which the source text 17 | is written. One of "EN", "DE", "FR", "ES", "IT", "NL", "PL". Default is "EN".} 18 | \item{target.lang}{The language code that corresponds with the language into which the source 19 | text is to be translated. One of "EN", "DE", "FR", "ES", "IT", "NL", "PL". Default is "DE".} 20 | \item{url}{Url to the inofficial DeepL API.} 21 | } 22 | \description{ 23 | Translates relying on https://www.deepl.com/translator. 24 | } 25 | \examples{ 26 | 27 | # Load the package 28 | library(deeplr) 29 | 30 | # Translate a single string 31 | dat <- "Essentially, all models are wrong, but some are useful" 32 | translate(dataset = dat) 33 | 34 | # Translate a character vector 35 | dat <- c("The only way to find out what happens is to disturb the system", 36 | "The whole problem with the world is that fools are always so certain of themselves") 37 | translate(dataset = dat) 38 | 39 | # Translate a column in a dataframe 40 | dat <- data.frame(text = c("The only way to find out what happens is to disturb the system", 41 | "The whole problem with the world is that fools are always so certain of themselves")) 42 | translate(dataset = dat, column.name = "text", source.lang = "EN", target.lang = "ES") 43 | 44 | 45 | swiss$country <- rownames(swiss) 46 | translate(dataset = swiss, column.name = "country", source.lang = "FR", target.lang = "EN") 47 | # well... 48 | 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /man/translate_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/translate_df.R 3 | \name{translate_df} 4 | \alias{translate_df} 5 | \title{Translates a column in a dataframe.} 6 | \usage{ 7 | translate_df(dataset = NULL, column.name = NULL, source.lang = "DE", 8 | target.lang = "EN", add.source.lang = FALSE, auth_key = NULL, 9 | url = "https://api.deepl.com/v2/translate?text=") 10 | } 11 | \arguments{ 12 | \item{dataset}{Dataframe with column of class character.} 13 | 14 | \item{column.name}{Name of column that should be translated.} 15 | 16 | \item{source.lang}{Source language. Either "detect" or one of "EN", "FR", etc.} 17 | 18 | \item{target.lang}{Target language. One of "EN", "FR", etc.} 19 | 20 | \item{add.source.lang}{A logical. If TRUE adds the preset or detected language for each 21 | translation. FALSE (default) adds nothing.} 22 | 23 | \item{auth_key}{Your API key.} 24 | 25 | \item{url}{Url to DeepL API.} 26 | } 27 | \value{ 28 | Dataframe with original columns and translated column. add.source.lang = TRUE adds 29 | additional column "source_lang". 30 | } 31 | \description{ 32 | Translates a column in a dataframe. 33 | } 34 | \examples{ 35 | \dontrun{ 36 | dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 37 | le systeme.", 38 | "The whole problem with the world is that fools are always so 39 | certain of themselves")) 40 | 41 | 42 | # Without language detection and without adding set/detected source language. 43 | translate_df(dataset = dat, 44 | column.name = "text", 45 | source.lang = "EN", 46 | target.lang = "DE", 47 | auth_key = "enter you auth key here") 48 | 49 | dat <- data.frame(text = c("A dog.", 50 | "Un chien.", 51 | "Un perro.", 52 | "Un cane.", 53 | "Een hond.")) 54 | 55 | # With language detection and with adding set/detected source language. 56 | translate_df(dataset = dat, 57 | column.name = "text", 58 | source.lang = "detect", 59 | target.lang = "DE", 60 | add.source.lang = TRUE, 61 | auth_key = "enter you auth key here") 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/persistent-state: -------------------------------------------------------------------------------- 1 | build-last-errors="[]" 2 | build-last-errors-base-dir="C:/GoogleDrive/7-Software/deeplr/" 3 | build-last-outputs="[{\"output\":\"==> devtools::document(roclets=c('rd', 'collate', 'namespace', 'vignette'))\\n\\n\",\"type\":0},{\"output\":\"Updating deeplr documentation\\r\\n\",\"type\":2},{\"output\":\"Loading deeplr\\r\\n\",\"type\":2},{\"output\":\"Warnmeldung:\\r\\nPaket 'svMisc' wurde unter R Version 3.5.1 erstellt \\r\\n\",\"type\":2},{\"output\":\"Documentation completed\\n\\n\",\"type\":1},{\"output\":\"==> Rcmd.exe INSTALL --no-multiarch --with-keep.source deeplr\\n\\n\",\"type\":0},{\"output\":\"* installing to library 'U:/Eigene Dokumente/R/R-3.5.0/library'\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package 'deeplr' ...\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\r\\n\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"Warnung: package 'svMisc' was built under R version 3.5.1\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\" converting help for package 'deeplr'\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\" finding HTML links ...\",\"type\":1},{\"output\":\" fertig\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\" translate html \",\"type\":1},{\"output\":\"\\r\\n\",\"type\":1},{\"output\":\" translate_df html \\r\\n\",\"type\":1},{\"output\":\" translate_vec html \",\"type\":1},{\"output\":\"\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"Warnung: package 'svMisc' was built under R version 3.5.1\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (deeplr)\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"In R CMD INSTALL\\r\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" 4 | compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" 5 | files.monitored-path="" 6 | find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" 7 | imageDirtyState="1" 8 | saveActionState="-1" 9 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/17978B3F-contents: -------------------------------------------------------------------------------- 1 | 2 | deeplr 3 | ====== 4 | 5 | The DeepL Translator made headlines for providing better translations than Google etc. `deeplr` is a quick & dirty coded package that contains a singular function - `translate()` - that accesses the DeepL API. I was inspired by the [translateR package](https://github.com/ChristopherLucas/translateR) package. 6 | 7 | To access the API you need to get your own API key from deeplr: . 8 | 9 | Beware of the API request limits. See . 10 | 11 | - "The request size should not exceed 30kbytes. The maximum number of texts to be translated within one request should not exceed 50." 12 | - "Please ensure your client does not exceed the limits as specified in the quota documentation delivered to you." 13 | 14 | Installation: How do I get the package? 15 | --------------------------------------- 16 | 17 | ``` r 18 | # Development version 19 | # install.packages("devtools") 20 | devtools::install_github("paulcbauer/deeplr") 21 | ``` 22 | 23 | Example: How do I use the package? 24 | ---------------------------------- 25 | 26 | You can feed `translate()` with a single string, a vector of strings (class "character") or a dataframe that contains a column of class "character". It will return the same format. 27 | 28 | ``` r 29 | # Load the package 30 | library(deeplr) 31 | 32 | # Translate a single string 33 | dat <- "Essentially, all models are wrong, but some are useful" 34 | translate(dataset = dat, 35 | source.lang = "EN", 36 | target.lang = "DE", 37 | auth_key = "enter your key here") 38 | 39 | # Translate a character vector 40 | dat <- c("The only way to find out what happens is to disturb the system", 41 | "The whole problem with the world is that fools are always so certain of themselves") 42 | translate(dataset = dat, 43 | source.lang = "EN", 44 | target.lang = "DE", 45 | auth_key = "enter your key here") 46 | 47 | # Translate a column in a dataframe 48 | dat <- data.frame(text = c("The only way to find out what happens is to disturb the system", 49 | "The whole problem with the world is that fools are always so certain of themselves")) 50 | translate(dataset = dat, 51 | source.lang = "EN", 52 | target.lang = "DE", 53 | column.name = "text", 54 | auth_key = "enter your key here") 55 | 56 | 57 | swiss$country <- rownames(swiss) 58 | translate(dataset = swiss, 59 | column.name = "country", 60 | source.lang = "FR", 61 | target.lang = "EN", 62 | auth_key = "enter your key here") 63 | # well... 64 | ``` 65 | 66 | Next steps 67 | ---------- 68 | 69 | - Increase efficiency of code 70 | - Add option to output multiple languages 71 | - Test robustness.. encoding issues etc. 72 | 73 | License 74 | ------- 75 | 76 | MIT 77 | -------------------------------------------------------------------------------- /R/translate.R: -------------------------------------------------------------------------------- 1 | translate <- function(dataset = NULL, 2 | column.name = NULL, 3 | source.lang = "DE", 4 | target.lang = "EN", 5 | auth_key = NULL, 6 | url = "https://api.deepl.com/v2/translate?text=" 7 | ) { 8 | 9 | 10 | if(is.null(auth_key)){cat("You need an API key. See https://www.deepl.com/api-contact.html.")}else{ 11 | 12 | # INPUT: Character vector of length 1 #### 13 | if(inherits(dataset,"character")==TRUE&length(dataset)==1){ 14 | i <- dataset 15 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 16 | response <- httr::GET(paste(url, 17 | i, 18 | "&source_lang=", source.lang, 19 | "&target_lang=", target.lang, 20 | "&auth_key=", auth_key 21 | , sep = "")) 22 | 23 | 24 | 25 | respcontent <- httr::content(response, as="text", encoding = "UTF-8") 26 | return(jsonlite::fromJSON(respcontent)$translations$text) 27 | } 28 | 29 | 30 | 31 | # INPUT: Character vector of length > 1 #### 32 | if(inherits(dataset,"character")==TRUE&length(dataset)>1){ 33 | responses <- NULL 34 | z <- 0 35 | for(i in dataset){ 36 | svMisc::progress(z, max.value = length(dataset)) 37 | z <- z+1 38 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 39 | response.i <- GET(paste(url, 40 | i, 41 | "&source_lang=", source.lang, 42 | "&target_lang=", target.lang, 43 | "&auth_key=", auth_key 44 | , sep = "")) 45 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 46 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 47 | responses <- c(responses, result.i) 48 | } 49 | return(responses) 50 | 51 | } 52 | 53 | 54 | 55 | # INPUT: Dataframe with text in column #### 56 | if(inherits(dataset,"data.frame")==TRUE&!is.null(column.name)){ 57 | 58 | dataset2 <- dataset %>% dplyr::pull(column.name) %>% as.character() 59 | 60 | responses <- NULL 61 | z <- 0 62 | for(i in dataset2){ 63 | svMisc::progress(z, max.value = length(dataset2)) 64 | z <- z+1 65 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 66 | response.i <- GET(paste(url, 67 | i, 68 | "&source_lang=", source.lang, 69 | "&target_lang=", target.lang, 70 | "&auth_key=", auth_key 71 | , sep = "")) 72 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 73 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 74 | responses <- c(responses, result.i) 75 | } 76 | dataset <- cbind(dataset, translation = responses) 77 | return(dataset) 78 | } 79 | 80 | if(inherits(dataset,"data.frame")==TRUE&is.null(column.name)){cat("If input is a data.frame you have to specify a column name, e.g. translate(dataset = dat, column.name = 'text'.")} 81 | 82 | 83 | 84 | 85 | } 86 | 87 | } # End of function 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | # deeplr 18 | 19 | The DeepL Translator made headlines for providing better translations than Google etc. ```deeplr``` is a quick & dirty coded package that contains functions - ```translate_vec()``` and ```translate_df()``` - that access the DeepL API. I was inspired by the [translateR package](https://github.com/ChristopherLucas/translateR) package. 20 | 21 | To access the API you need to get your own API key from deeplr: https://www.deepl.com/api-contact.html. 22 | 23 | Beware of the API request limits. See https://www.deepl.com/api-reference.html. 24 | 25 | * "The request size should not exceed 30kbytes. The maximum number of texts to be translated within one request should not exceed 50." 26 | * "Please ensure your client does not exceed the limits as specified in the quota documentation delivered to you." 27 | 28 | ## Installation: How do I get the package? 29 | 30 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE, paged.print=FALSE} 31 | # Development version 32 | # install.packages("devtools") 33 | devtools::install_github("paulcbauer/deeplr") 34 | ``` 35 | 36 | ## Example: How do I use the package? 37 | 38 | You can feed ```translate_vec()``` with a single string, a vector of strings (class "character"). You can feed ```translate_df()``` with a dataframe that contains a column of class "character". 39 | 40 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE, paged.print=FALSE} 41 | # Load the package 42 | library(deeplr) 43 | 44 | # Translate a single string/character vector 45 | dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 46 | "The whole problem with the world is that fools are always so certain of themselves") 47 | 48 | # Without language detection and without adding set/detected source language. 49 | translate_vec(dataset = dat, 50 | source.lang = "EN", 51 | target.lang = "DE", 52 | auth_key = "enter you auth key here") 53 | 54 | dat <- c("A dog.", 55 | "Un chien.", 56 | "Un perro.", 57 | "Un cane.", 58 | "Een hond.") 59 | 60 | # With language detection and with adding set/detected source language. 61 | translate_vec(dataset = dat, 62 | source.lang = "detect", 63 | target.lang = "DE", 64 | add.source.lang = TRUE, 65 | auth_key = "enter you auth key here") 66 | 67 | 68 | 69 | 70 | 71 | # Translate a column in a dataframe 72 | dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 73 | le systeme.", 74 | "The whole problem with the world is that fools are always so 75 | certain of themselves")) 76 | 77 | 78 | # Without language detection and without adding set/detected source language. 79 | translate_df(dataset = dat, 80 | column.name = "text", 81 | source.lang = "EN", 82 | target.lang = "DE", 83 | auth_key = "enter you auth key here") 84 | 85 | dat <- data.frame(text = c("A dog.", 86 | "Un chien.", 87 | "Un perro.", 88 | "Un cane.", 89 | "Een hond.")) 90 | 91 | # With language detection and with adding set/detected source language. 92 | translate_df(dataset = dat, 93 | column.name = "text", 94 | source.lang = "detect", 95 | target.lang = "DE", 96 | add.source.lang = TRUE, 97 | auth_key = "enter you auth key here") 98 | # well... 99 | ``` 100 | 101 | 102 | ## Next steps 103 | * Increase efficiency of code 104 | * Add option to output multiple languages 105 | * Test robustness.. encoding issues etc. 106 | 107 | ## License 108 | License: CC BY-NC-SA 4.0 109 | -------------------------------------------------------------------------------- /R/translate_vec.R: -------------------------------------------------------------------------------- 1 | #' Translates a character vector. 2 | #' 3 | #' @return A character vector. If add.source.lang = TRUE a dataframe with an additional 4 | #' column "source_lang". 5 | #' 6 | #' @param dataset A character vector. 7 | #' @param source.lang Source language. Either "detect" or one of "EN", "FR", etc. 8 | #' @param target.lang Target language. One of "EN", "FR", etc. 9 | #' @param add.source.lang A logical. If TRUE adds the preset or detected language for each 10 | #' translation. FALSE (default) adds nothing. 11 | #' @param auth_key Your API key. 12 | #' @param url Url to DeepL API. 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 17 | #' "The whole problem with the world is that fools are always so certain of themselves") 18 | #' 19 | #' # Without language detection and without adding set/detected source language. 20 | #' translate_vec(dataset = dat, 21 | #' source.lang = "EN", 22 | #' target.lang = "DE", 23 | #' auth_key = "enter you auth key here") 24 | #' 25 | #' dat <- c("A dog.", 26 | #' "Un chien.", 27 | #' "Un perro.", 28 | #' "Un cane.", 29 | #' "Een hond.") 30 | #' 31 | #' # With language detection and with adding set/detected source language. 32 | #' translate_vec(dataset = dat, 33 | #' source.lang = "detect", 34 | #' target.lang = "DE", 35 | #' add.source.lang = TRUE, 36 | #' auth_key = "enter you auth key here") 37 | #' } 38 | 39 | 40 | translate_vec <- function(dataset = NULL, 41 | source.lang = "DE", 42 | target.lang = "EN", 43 | add.source.lang = FALSE, 44 | auth_key = NULL, 45 | url = "https://api.deepl.com/v2/translate?text=" 46 | ) { 47 | 48 | 49 | 50 | if(is.null(auth_key)){cat("You need an API key. See https://www.deepl.com/api-contact.html.")}else{ 51 | 52 | 53 | if(inherits(dataset,"character")==TRUE){ 54 | responses <- NULL 55 | source_lang <- NULL 56 | z <- 0 57 | for(i in dataset){ 58 | svMisc::progress(z, max.value = length(dataset)) 59 | z <- z+1 60 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 61 | 62 | 63 | # Source language: "detect" vs. "X" 64 | if(source.lang=="detect"){ 65 | 66 | 67 | response.i <- httr::GET(paste(url, 68 | i, 69 | "&target_lang=", target.lang, 70 | "&auth_key=", auth_key 71 | , sep = "")) 72 | }else{ 73 | response.i <- httr::GET(paste(url, 74 | i, 75 | "&source_lang=", source.lang, 76 | "&target_lang=", target.lang, 77 | "&auth_key=", auth_key 78 | , sep = "")) 79 | } 80 | 81 | 82 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 83 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 84 | responses <- c(responses, result.i) 85 | 86 | 87 | # Source language: "detect" vs. "X" 88 | if(source.lang == "detect"){ 89 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 90 | }else{ 91 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 92 | } 93 | source_lang <- c(source_lang, source.lang.i) 94 | 95 | 96 | 97 | } 98 | 99 | # OUPUT 100 | 101 | if(add.source.lang == TRUE){ 102 | return(data.frame(cbind(translation = responses, source_lang))) 103 | }else{ 104 | return(responses) 105 | } 106 | 107 | 108 | 109 | 110 | }else{ 111 | cat("The input is not a character vector of length > 1.") 112 | } 113 | 114 | 115 | } # API KEY 116 | 117 | } # End of function 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/35CFFFCE-contents: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | # deeplr 18 | 19 | The DeepL Translator made headlines for providing better translations than Google etc. ```deeplr``` is a quick & dirty coded package that contains functions - ```translate_vec()``` and ```translate_df()``` - that access the DeepL API. I was inspired by the [translateR package](https://github.com/ChristopherLucas/translateR) package. 20 | 21 | To access the API you need to get your own API key from deeplr: https://www.deepl.com/api-contact.html. 22 | 23 | Beware of the API request limits. See https://www.deepl.com/api-reference.html. 24 | 25 | * "The request size should not exceed 30kbytes. The maximum number of texts to be translated within one request should not exceed 50." 26 | * "Please ensure your client does not exceed the limits as specified in the quota documentation delivered to you." 27 | 28 | ## Installation: How do I get the package? 29 | 30 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE, paged.print=FALSE} 31 | # Development version 32 | # install.packages("devtools") 33 | devtools::install_github("paulcbauer/deeplr") 34 | ``` 35 | 36 | ## Example: How do I use the package? 37 | 38 | You can feed ```translate_vec()``` with a single string, a vector of strings (class "character"). You can feed ```translate_df()``` with a dataframe that contains a column of class "character". 39 | 40 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=TRUE, paged.print=FALSE} 41 | # Load the package 42 | library(deeplr) 43 | 44 | # Translate a single string/character vector 45 | dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 46 | "The whole problem with the world is that fools are always so certain of themselves") 47 | 48 | # Without language detection and without adding set/detected source language. 49 | translate_vec(dataset = dat, 50 | source.lang = "EN", 51 | target.lang = "DE", 52 | auth_key = "enter you auth key here") 53 | 54 | dat <- c("A dog.", 55 | "Un chien.", 56 | "Un perro.", 57 | "Un cane.", 58 | "Een hond.") 59 | 60 | # With language detection and with adding set/detected source language. 61 | translate_vec(dataset = dat, 62 | source.lang = "detect", 63 | target.lang = "DE", 64 | add.source.lang = TRUE, 65 | auth_key = "enter you auth key here") 66 | 67 | 68 | 69 | 70 | 71 | # Translate a column in a dataframe 72 | dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 73 | le systeme.", 74 | "The whole problem with the world is that fools are always so 75 | certain of themselves")) 76 | 77 | 78 | # Without language detection and without adding set/detected source language. 79 | translate_df(dataset = dat, 80 | column.name = "text", 81 | source.lang = "EN", 82 | target.lang = "DE", 83 | auth_key = "enter you auth key here") 84 | 85 | dat <- data.frame(text = c("A dog.", 86 | "Un chien.", 87 | "Un perro.", 88 | "Un cane.", 89 | "Een hond.")) 90 | 91 | # With language detection and with adding set/detected source language. 92 | translate_df(dataset = dat, 93 | column.name = "text", 94 | source.lang = "detect", 95 | target.lang = "DE", 96 | add.source.lang = TRUE, 97 | auth_key = "enter you auth key here") 98 | # well... 99 | ``` 100 | 101 | 102 | ## Next steps 103 | * Increase efficiency of code 104 | * Add option to output multiple languages 105 | * Test robustness.. encoding issues etc. 106 | 107 | ## License 108 | License: CC BY-NC-SA 4.0 109 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/1D0D1931-contents: -------------------------------------------------------------------------------- 1 | #' Translates a character vector. 2 | #' 3 | #' @return A character vector. If add.source.lang = TRUE a dataframe with an additional 4 | #' column "source_lang". 5 | #' 6 | #' @param dataset A character vector. 7 | #' @param source.lang Source language. Either "detect" or one of "EN", "FR", etc. 8 | #' @param target.lang Target language. One of "EN", "FR", etc. 9 | #' @param add.source.lang A logical. If TRUE adds the preset or detected language for each 10 | #' translation. FALSE (default) adds nothing. 11 | #' @param auth_key Your API key. 12 | #' @param url Url to DeepL API. 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 17 | #' "The whole problem with the world is that fools are always so certain of themselves") 18 | #' 19 | #' # Without language detection and without adding set/detected source language. 20 | #' translate_vec(dataset = dat, 21 | #' source.lang = "EN", 22 | #' target.lang = "DE", 23 | #' auth_key = "enter you auth key here") 24 | #' 25 | #' dat <- c("A dog.", 26 | #' "Un chien.", 27 | #' "Un perro.", 28 | #' "Un cane.", 29 | #' "Een hond.") 30 | #' 31 | #' # With language detection and with adding set/detected source language. 32 | #' translate_vec(dataset = dat, 33 | #' source.lang = "detect", 34 | #' target.lang = "DE", 35 | #' add.source.lang = TRUE, 36 | #' auth_key = "enter you auth key here") 37 | #' } 38 | 39 | 40 | translate_vec <- function(dataset = NULL, 41 | source.lang = "DE", 42 | target.lang = "EN", 43 | add.source.lang = FALSE, 44 | auth_key = NULL, 45 | url = "https://api.deepl.com/v2/translate?text=" 46 | ) { 47 | 48 | 49 | 50 | if(is.null(auth_key)){cat("You need an API key. See https://www.deepl.com/api-contact.html.")}else{ 51 | 52 | 53 | if(inherits(dataset,"character")==TRUE){ 54 | responses <- NULL 55 | source_lang <- NULL 56 | z <- 0 57 | for(i in dataset){ 58 | svMisc::progress(z, max.value = length(dataset)) 59 | z <- z+1 60 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 61 | 62 | 63 | # Source language: "detect" vs. "X" 64 | if(source.lang=="detect"){ 65 | 66 | 67 | response.i <- httr::GET(paste(url, 68 | i, 69 | "&target_lang=", target.lang, 70 | "&auth_key=", auth_key 71 | , sep = "")) 72 | }else{ 73 | response.i <- httr::GET(paste(url, 74 | i, 75 | "&source_lang=", source.lang, 76 | "&target_lang=", target.lang, 77 | "&auth_key=", auth_key 78 | , sep = "")) 79 | } 80 | 81 | 82 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 83 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 84 | responses <- c(responses, result.i) 85 | 86 | 87 | # Source language: "detect" vs. "X" 88 | if(source.lang == "detect"){ 89 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 90 | }else{ 91 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 92 | } 93 | source_lang <- c(source_lang, source.lang.i) 94 | 95 | 96 | 97 | } 98 | 99 | # OUPUT 100 | 101 | if(add.source.lang == TRUE){ 102 | return(data.frame(cbind(translation = responses, source_lang))) 103 | }else{ 104 | return(responses) 105 | } 106 | 107 | 108 | 109 | 110 | }else{ 111 | cat("The input is not a character vector of length > 1.") 112 | } 113 | 114 | 115 | } # API KEY 116 | 117 | } # End of function 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | deeplr 3 | ====== 4 | 5 | This repository is currently not regularly maintained. Please use the following package that is also officially published on CRAN and regularly maintained: See here https://github.com/zumbov2/deeplr and here https://cran.r-project.org/web/packages/deeplr/index.html. If you want to contribute please do so by creating pull-requests in this latter repository. 6 | 7 | 8 | The DeepL Translator made headlines for providing better translations than Google etc. `deeplr` is a quick & dirty coded package that contains functions - `translate_vec()` and `translate_df()` - that access the DeepL API. I was inspired by the [translateR package](https://github.com/ChristopherLucas/translateR) package. 9 | 10 | To access the API you need to get your own API key from deeplr: . 11 | 12 | Beware of the API request limits. See . 13 | 14 | - "The request size should not exceed 30kbytes. The maximum number of texts to be translated within one request should not exceed 50." 15 | - "Please ensure your client does not exceed the limits as specified in the quota documentation delivered to you." 16 | 17 | Installation: How do I get the package? 18 | --------------------------------------- 19 | 20 | ``` r 21 | # Development version 22 | # install.packages("devtools") 23 | devtools::install_github("paulcbauer/deeplr") 24 | ``` 25 | 26 | Example: How do I use the package? 27 | ---------------------------------- 28 | 29 | You can feed `translate_vec()` with a single string, a vector of strings (class "character"). You can feed `translate_df()` with a dataframe that contains a column of class "character". 30 | 31 | ``` r 32 | # Load the package 33 | library(deeplr) 34 | 35 | # Translate a single string/character vector 36 | dat <- c("La seule facon de savoir ce qui se passe est de perturber le systeme.", 37 | "The whole problem with the world is that fools are always so certain of themselves") 38 | 39 | # Without language detection and without adding set/detected source language. 40 | translate_vec(dataset = dat, 41 | source.lang = "EN", 42 | target.lang = "DE", 43 | auth_key = "enter you auth key here") 44 | 45 | dat <- c("A dog.", 46 | "Un chien.", 47 | "Un perro.", 48 | "Un cane.", 49 | "Een hond.") 50 | 51 | # With language detection and with adding set/detected source language. 52 | translate_vec(dataset = dat, 53 | source.lang = "detect", 54 | target.lang = "DE", 55 | add.source.lang = TRUE, 56 | auth_key = "enter you auth key here") 57 | 58 | 59 | 60 | 61 | 62 | # Translate a column in a dataframe 63 | dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 64 | le systeme.", 65 | "The whole problem with the world is that fools are always so 66 | certain of themselves")) 67 | 68 | 69 | # Without language detection and without adding set/detected source language. 70 | translate_df(dataset = dat, 71 | column.name = "text", 72 | source.lang = "EN", 73 | target.lang = "DE", 74 | auth_key = "enter you auth key here") 75 | 76 | dat <- data.frame(text = c("A dog.", 77 | "Un chien.", 78 | "Un perro.", 79 | "Un cane.", 80 | "Een hond.")) 81 | 82 | # With language detection and with adding set/detected source language. 83 | translate_df(dataset = dat, 84 | column.name = "text", 85 | source.lang = "detect", 86 | target.lang = "DE", 87 | add.source.lang = TRUE, 88 | auth_key = "enter you auth key here") 89 | # well... 90 | ``` 91 | 92 | Next steps 93 | ---------- 94 | 95 | - Increase efficiency of code 96 | - Add option to output multiple languages 97 | - Test robustness.. encoding issues etc. 98 | 99 | License 100 | ------- 101 | 102 | License: CC BY-NC-SA 4.0 103 | -------------------------------------------------------------------------------- /R/translate_df.R: -------------------------------------------------------------------------------- 1 | #' Translates a column in a dataframe. 2 | #' 3 | #' @return Dataframe with original columns and translated column. add.source.lang = TRUE adds 4 | #' additional column "source_lang". 5 | #' 6 | #' @param dataset Dataframe with column of class character. 7 | #' @param column.name Name of column that should be translated. 8 | #' @param source.lang Source language. Either "detect" or one of "EN", "FR", etc. 9 | #' @param target.lang Target language. One of "EN", "FR", etc. 10 | #' @param add.source.lang A logical. If TRUE adds the preset or detected language for each 11 | #' translation. FALSE (default) adds nothing. 12 | #' @param auth_key Your API key. 13 | #' @param url Url to DeepL API. 14 | #' 15 | #' @examples 16 | #' \dontrun{ 17 | #' dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 18 | #' le systeme.", 19 | #' "The whole problem with the world is that fools are always so 20 | #' certain of themselves")) 21 | #' 22 | #' 23 | #' # Without language detection and without adding set/detected source language. 24 | #' translate_df(dataset = dat, 25 | #' column.name = "text", 26 | #' source.lang = "EN", 27 | #' target.lang = "DE", 28 | #' auth_key = "enter you auth key here") 29 | #' 30 | #' dat <- data.frame(text = c("A dog.", 31 | #' "Un chien.", 32 | #' "Un perro.", 33 | #' "Un cane.", 34 | #' "Een hond.")) 35 | #' 36 | #' # With language detection and with adding set/detected source language. 37 | #' translate_df(dataset = dat, 38 | #' column.name = "text", 39 | #' source.lang = "detect", 40 | #' target.lang = "DE", 41 | #' add.source.lang = TRUE, 42 | #' auth_key = "enter you auth key here") 43 | #' } 44 | 45 | 46 | translate_df <- function(dataset = NULL, 47 | column.name = NULL, 48 | source.lang = "DE", 49 | target.lang = "EN", 50 | add.source.lang = FALSE, 51 | auth_key = NULL, 52 | url = "https://api.deepl.com/v2/translate?text=" 53 | ) { 54 | 55 | 56 | 57 | if(is.null(auth_key)){cat("You need an API key. See https://www.deepl.com/api-contact.html.")}else{ 58 | 59 | # INPUT: Dataframe with text in column #### 60 | if(inherits(dataset,"data.frame")==TRUE&!is.null(column.name)){ 61 | 62 | dataset2 <- dataset %>% dplyr::pull(column.name) %>% as.character() 63 | 64 | responses <- NULL 65 | source_lang <- NULL 66 | z <- 0 67 | 68 | 69 | 70 | for(i in dataset2){ 71 | svMisc::progress(z, max.value = length(dataset2)) 72 | z <- z+1 73 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 74 | 75 | # Source language: "detect" vs. "X" 76 | if(source.lang=="detect"){ 77 | 78 | 79 | response.i <- httr::GET(paste(url, 80 | i, 81 | "&target_lang=", target.lang, 82 | "&auth_key=", auth_key 83 | , sep = "")) 84 | }else{ 85 | response.i <- httr::GET(paste(url, 86 | i, 87 | "&source_lang=", source.lang, 88 | "&target_lang=", target.lang, 89 | "&auth_key=", auth_key 90 | , sep = "")) 91 | } 92 | 93 | 94 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 95 | #print(respcontent.i) 96 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 97 | 98 | # Source language: "detect" vs. "X" 99 | if(source.lang == "detect"){ 100 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 101 | }else{ 102 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 103 | } 104 | 105 | #print(result.i) 106 | responses <- c(responses, result.i) 107 | #print(responses) 108 | source_lang <- c(source_lang, source.lang.i) 109 | } 110 | column.name.new <- paste0(column.name, "_", target.lang) 111 | dataset <- dplyr::bind_cols(dataset, newtranslation = responses) 112 | names(dataset)[names(dataset)=="newtranslation"] <- column.name.new 113 | 114 | 115 | # OUPUT 116 | 117 | if(add.source.lang == TRUE){ 118 | return(data.frame(cbind(dataset, source_lang))) 119 | }else{ 120 | return(dataset) 121 | } 122 | 123 | 124 | }else{ 125 | cat("The input is not of class() dataframe or you forgot to specify the name of the column that shall be translated.") 126 | } 127 | 128 | 129 | 130 | } # API KEY 131 | 132 | } # End of function 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/F27A9EBB-contents: -------------------------------------------------------------------------------- 1 | #' Translates a column in a dataframe. 2 | #' 3 | #' @return Dataframe with original columns and translated column. add.source.lang = TRUE adds 4 | #' additional column "source_lang". 5 | #' 6 | #' @param dataset Dataframe with column of class character. 7 | #' @param column.name Name of column that should be translated. 8 | #' @param source.lang Source language. Either "detect" or one of "EN", "FR", etc. 9 | #' @param target.lang Target language. One of "EN", "FR", etc. 10 | #' @param add.source.lang A logical. If TRUE adds the preset or detected language for each 11 | #' translation. FALSE (default) adds nothing. 12 | #' @param auth_key Your API key. 13 | #' @param url Url to DeepL API. 14 | #' 15 | #' @examples 16 | #' \dontrun{ 17 | #' dat <- data.frame(text = c("La seule facon de savoir ce qui se passe est de perturber 18 | #' le systeme.", 19 | #' "The whole problem with the world is that fools are always so 20 | #' certain of themselves")) 21 | #' 22 | #' 23 | #' # Without language detection and without adding set/detected source language. 24 | #' translate_df(dataset = dat, 25 | #' column.name = "text", 26 | #' source.lang = "EN", 27 | #' target.lang = "DE", 28 | #' auth_key = "enter you auth key here") 29 | #' 30 | #' dat <- data.frame(text = c("A dog.", 31 | #' "Un chien.", 32 | #' "Un perro.", 33 | #' "Un cane.", 34 | #' "Een hond.")) 35 | #' 36 | #' # With language detection and with adding set/detected source language. 37 | #' translate_df(dataset = dat, 38 | #' column.name = "text", 39 | #' source.lang = "detect", 40 | #' target.lang = "DE", 41 | #' add.source.lang = TRUE, 42 | #' auth_key = "enter you auth key here") 43 | #' } 44 | 45 | 46 | translate_df <- function(dataset = NULL, 47 | column.name = NULL, 48 | source.lang = "DE", 49 | target.lang = "EN", 50 | add.source.lang = FALSE, 51 | auth_key = NULL, 52 | url = "https://api.deepl.com/v2/translate?text=" 53 | ) { 54 | 55 | 56 | 57 | if(is.null(auth_key)){cat("You need an API key. See https://www.deepl.com/api-contact.html.")}else{ 58 | 59 | # INPUT: Dataframe with text in column #### 60 | if(inherits(dataset,"data.frame")==TRUE&!is.null(column.name)){ 61 | 62 | dataset2 <- dataset %>% dplyr::pull(column.name) %>% as.character() 63 | 64 | responses <- NULL 65 | source_lang <- NULL 66 | z <- 0 67 | 68 | 69 | 70 | for(i in dataset2){ 71 | svMisc::progress(z, max.value = length(dataset2)) 72 | z <- z+1 73 | i <- stringr::str_replace(gsub("\\s+", "%20", stringr::str_trim(i)), "B", "b") 74 | 75 | # Source language: "detect" vs. "X" 76 | if(source.lang=="detect"){ 77 | 78 | 79 | response.i <- httr::GET(paste(url, 80 | i, 81 | "&target_lang=", target.lang, 82 | "&auth_key=", auth_key 83 | , sep = "")) 84 | }else{ 85 | response.i <- httr::GET(paste(url, 86 | i, 87 | "&source_lang=", source.lang, 88 | "&target_lang=", target.lang, 89 | "&auth_key=", auth_key 90 | , sep = "")) 91 | } 92 | 93 | 94 | respcontent.i <- httr::content(response.i, as="text", encoding = "UTF-8") 95 | #print(respcontent.i) 96 | result.i <- jsonlite::fromJSON(respcontent.i)$translations$text 97 | 98 | # Source language: "detect" vs. "X" 99 | if(source.lang == "detect"){ 100 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 101 | }else{ 102 | source.lang.i <- jsonlite::fromJSON(respcontent.i)$translations$detected_source_language 103 | } 104 | 105 | #print(result.i) 106 | responses <- c(responses, result.i) 107 | #print(responses) 108 | source_lang <- c(source_lang, source.lang.i) 109 | } 110 | column.name.new <- paste0(column.name, "_", target.lang) 111 | dataset <- dplyr::bind_cols(dataset, newtranslation = responses) 112 | names(dataset)[names(dataset)=="newtranslation"] <- column.name.new 113 | 114 | 115 | # OUPUT 116 | 117 | if(add.source.lang == TRUE){ 118 | return(data.frame(cbind(dataset, source_lang))) 119 | }else{ 120 | return(dataset) 121 | } 122 | 123 | 124 | }else{ 125 | cat("The input is not of class() dataframe or you forgot to specify the name of the column that shall be translated.") 126 | } 127 | 128 | 129 | 130 | } # API KEY 131 | 132 | } # End of function 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /.Rproj.user/DC33445C/sources/s-8812577A/42984BE0-contents: -------------------------------------------------------------------------------- 1 | --- 2 | title: | 3 | | The Polarization of Trust in the EU*^[ Acknowledgements: We thank.... Contact: mail@paulcbauer.eu, davide.morisi@univie.ac.at] 4 | author: | 5 | | Paul C. Bauer (MZES, Mannheim) 6 | | Davide Morisi (University of Wien) 7 | date: '' 8 | linestretch: 1.2 9 | colorlinks: true 10 | abstract: \noindent\setstretch{1}Scholars usually investigate how average levels of trust in EU institutions vary across countries and across time. In the present study we change the perspective. Instead of focusing on trust averages, we investigate trust polarization across European populations, relying on different measures of polarization. We base our analysis on data from the European Social Survey that span the years 2002 to 2016, which allows for a comparative perspective both across countries and time. Our evidence suggests that European populations strongly differ in their level of polarization. Moreover, there are also strong differences in time trends of polarization. An investigation of between group polarization of exogenously defined groups remains inconclusive in what regards the drivers of this polarization.\vspace{.8cm} 11 | bibliography: Paperpile.bib 12 | output: 13 | bookdown::pdf_document2: 14 | includes: 15 | in_header: header.tex 16 | toc: no 17 | keep_tex: true 18 | mainfont: Wingdings 19 | sansfont: Wingdings 20 | fontsize: 12pt 21 | documentclass: article 22 | geometry: margin=1in 23 | always_allow_html: yes 24 | header-includes: 25 | - \usepackage{dcolumn} 26 | - \usepackage{color} 27 | --- 28 | 29 | \newcommand*{\secref}[1]{Section~\ref{#1}} 30 | 31 | ```{r setup, include = FALSE, cache = FALSE} 32 | set.seed(12345) 33 | rm(list=ls()) 34 | library(knitr) 35 | library(dplyr) 36 | library(stringr) 37 | # DATA 38 | # European Social Survey: http://www.europeansocialsurvey.org/ 39 | 40 | # cat(paste("#", capture.output(sessionInfo()), "\n", collapse ="")) 41 | ``` 42 | 43 | 44 | 45 | ```{r import-data, message=FALSE, warning=FALSE, include=FALSE, cache=TRUE} 46 | 47 | # education 48 | # (lower education [i.e., International Standard Classification of Education (ISCED) 0, 1 and 2] contrasted with higher education [i.e., ISCED 3–6] used as reference) 49 | 50 | library(plyr) 51 | library(dplyr) 52 | library(stringr) 53 | library(tidyr) 54 | library(haven) 55 | library(stargazer) 56 | 57 | rm(list=ls()) 58 | 59 | # Load datasets, subset and generate objects 60 | datasets <- dir(".", pattern = "ESS[0-9]e[0-9][0-9]_[0-9].dta", full.names = TRUE) 61 | years <- 2002+seq(0,14,2) 62 | for(i in 1:8){ 63 | data <- read_dta(datasets[i]) 64 | data <- data %>% select(essround, idno, cntry, trstprl, trstlgl, trstplc, trstplt, trstep, trstun, agea, eduyrs, hincfel, mainact, dweight, pweight) 65 | 66 | # Education, income, employment, age 67 | 68 | data <- data %>% dplyr::rename(trust_parliament = trstprl, 69 | trust_legalsystem = trstlgl, 70 | trust_police = trstplc, 71 | trust_politicians = trstplt, 72 | trust_euparliament = trstep, 73 | trust_un = trstun, 74 | age = agea, 75 | hincome_feeling = hincfel) 76 | # names(data) <- paste(names(data), "_", str_extract(str_replace(i, "\\./", ""), "^.{4}"), sep="") 77 | assign(paste(str_extract(str_replace(datasets[i], "\\./", ""), "^.{4}"), "_", years[i], sep=""), data.frame(data)) 78 | print(i) 79 | } 80 | rm(data, datasets, i, years) 81 | 82 | 83 | # Create longformat 84 | liste <- list(ESS1_2002, ESS2_2004, ESS3_2006, ESS4_2008, ESS5_2010, ESS6_2012, ESS7_2014, ESS8_2016) 85 | # GENERATE LONG FORMAT DATASET 86 | liste2 <- liste 87 | years <- c("2002", "2004", "2006", "2008", "2010", "2012", "2014", "2016") 88 | for (i in 1:8){ 89 | liste2[[i]] <- liste2[[i]] %>% mutate(year = years[i]) 90 | } 91 | data <- bind_rows(liste2[[1]],liste2[[2]],liste2[[3]],liste2[[4]], 92 | liste2[[5]],liste2[[6]],liste2[[7]],liste2[[8]]) 93 | ``` 94 | 95 | 96 | 97 | ```{r recode-vars, message=FALSE, warning=FALSE, include=FALSE, cache=TRUE} 98 | 99 | # Recode trust values higher 10 to NA 100 | data <- data %>% mutate_at(vars(trust_parliament:trust_un), # recode 101 | function(x) ifelse(x > 10, NA, x)) 102 | table(data$trust_euparliament) 103 | 104 | # Age 105 | data <- data %>% 106 | mutate(age = ifelse(age > 123, NA, age)) %>% 107 | mutate(age = ifelse(age < 18, NA, age)) %>% 108 | mutate(age_cat = as.numeric(as.character(cut(age, breaks = c(-1, 34, 65, 100.5), labels = c("0", "NA", "1"))))) 109 | #table(data$age, data$age_cat) 110 | 111 | # Recode: Respondent’s feelings about the household’s current income 112 | # (perceived higher income [i.e., the respondent reports coping on his or her present income or living comfortably on it] used as reference category versus perceived low income [i.e., the respondent reports finding it difficult or very difficult to cope on his or her present income]); 113 | 114 | # 1 Living comfortably on present income 115 | # 2 Coping on present income 116 | # 3 Difficult on present income 117 | # 4 Very difficult on present income 118 | data <- data %>% 119 | mutate(hincome_feeling = ifelse(hincome_feeling > 4, NA, hincome_feeling)) %>% 120 | mutate(hinc_good = ifelse(hincome_feeling > 2, 0, 1)) 121 | # table(data$hinc_good, data$hincome_feeling) 122 | 123 | 124 | # Employment status 125 | # the respondent’s employment status (in paid work or in education as reference 126 | # category versus unemployed, retired or other non-employed); 127 | data <- data %>% 128 | mutate(paid_work = ifelse(mainact > 9, NA, mainact)) 129 | data$paid_work <- as.numeric(dplyr::recode(data$paid_work, 130 | 1 == 1, 131 | 2 == 1, 132 | 3 == 0, 133 | 4 == 0, 134 | 5 == NA, 135 | 6 == 0, 136 | 7 == NA, 137 | 8 == NA, 138 | 9 == NA)) 139 | #table(data$mainact, data$paid_work) 140 | # 1 Paid work 141 | # 2 Education 142 | # 3 Unemployed, looking for job 143 | # 4 Unemployed, not looking for job 144 | # 5 Permanently sick or disabled 145 | # 6 Retired 146 | # 7 Community or military service 147 | # 8 Housework, looking after children, others 148 | # 9 Other 149 | # 66 Not applicable 150 | # 77 Refusal 151 | # 88 Don't know 152 | # 99 No answer 153 | 154 | 155 | # Education years 156 | # 77 Refusal 157 | # 88 Don't know 158 | # 99 No answer 159 | data <- data %>% 160 | mutate(education_years = ifelse(eduyrs > 56, NA, eduyrs)) %>% 161 | mutate(education_cat = as.numeric(as.character(cut(education_years, breaks = c(-1, 14.1, 57), labels = c("0", "1"))))) 162 | table(data$education_years) 163 | table(data$education_cat) 164 | 165 | data <- data %>% select(cntry, contains("trust"), age, age_cat, hinc_good, paid_work, education_years, education_cat, year, dweight, pweight) 166 | 167 | 168 | 169 | 170 | 171 | ``` 172 | 173 | 174 | ```{r add-country-names, message=FALSE, warning=FALSE, include=FALSE, cache=TRUE} 175 | 176 | # SCRAPE COUNTRY CODES FROM WIKIPEDIA 177 | # library(rvest) 178 | # countrycodes <- html("http://en.wikipedia.org/wiki/ISO_3166-1", encoding = "UTF-8") %>% 179 | # html_nodes(xpath='//*[@id="mw-content-text"]/div/table[2]') %>% 180 | # html_table() 181 | # countrycodes <- countrycodes[[1]] 182 | # names(countrycodes)[1:2] <- c("country", "cntry") 183 | # write.table(countrycodes, "data_countrycodes.csv", sep=",") 184 | countrycodes <- read.csv("data_countrycodes.csv", sep=",") 185 | data <- left_join(data, countrycodes, by="cntry", match="all") 186 | data$country <- as.character(data$country) 187 | data[data$cntry=="XK","country"] <- "Kosovo" 188 | data$country[data$country=="United Kingdom of Great Britain and Northern Ireland"] <- "United Kingdom" 189 | names(data) <- str_replace_all(names(data), "\\.", "_") 190 | # data <- data %>% rename(links = Link_to_ISO_3166_2_subdivision_codes) 191 | # data <- data %>% select(year, cntry, country, contains("trust_eu")) 192 | 193 | ``` 194 | 195 | 196 | 197 | ```{r subset-countries, message=FALSE, warning=FALSE, include=FALSE, cache=TRUE} 198 | data <- data %>% filter(country!="Albania", 199 | country!="Iceland", 200 | country!="Israel", 201 | country!="Kosovo", 202 | country!="Norway", 203 | country!="Russian Federation", 204 | country!="Turkey", 205 | country!="Ukraine") 206 | ``` 207 | 208 | 209 | 210 | 211 | 212 | ```{r polarization-overall, message=FALSE, warning=FALSE, include=FALSE, cache=TRUE} 213 | library(dplyr) 214 | 215 | data.agg <- data %>% select(-dweight, -pweight) %>% group_by(year, country) %>% 216 | summarise_at(vars(trust_parliament, trust_legalsystem, trust_police, trust_politicians, trust_euparliament, trust_un), funs(mean, sd, IQR), na.rm = TRUE) %>% # Aggregate 217 | mutate_at(c("trust_parliament_sd", "trust_legalsystem_sd", "trust_police_sd", "trust_politicians_sd", "trust_euparliament_sd", "trust_un_sd"), .funs = funs("2" = . * 2)) %>% # SD * 2 218 | mutate(trust_parliament_pol = (trust_parliament_sd_2*trust_parliament_IQR)/10) %>% # Polarization index 219 | mutate(trust_legalsystem_pol = (trust_legalsystem_sd_2*trust_legalsystem_IQR)/10) %>% # Polarization index 220 | mutate(trust_police_pol = (trust_police_sd_2*trust_police_IQR)/10) %>% # Polarization index 221 | mutate(trust_politicians_pol = (trust_politicians_sd_2*trust_politicians_IQR)/10) %>% # Polarization index 222 | mutate(trust_euparliament_pol = (trust_euparliament_sd_2*trust_euparliament_IQR)/10) %>% # Polarization index 223 | mutate(trust_un_pol = (trust_un_sd_2*trust_un_IQR)/10) # Polarization index 224 | 225 | 226 | 227 | # Write table 228 | write.table(data.agg, "ESS_aggregated_long.csv", row.names=FALSE, sep=",") 229 | write_dta(data.agg, "ESS_aggregated_long.dta") 230 | 231 | 232 | # Create wide format 233 | esswide <- data.agg %>% gather(key = variable, 234 | value = values, 235 | -year, 236 | -country) %>% 237 | unite(variable_time, variable, year, sep = "_") %>% 238 | spread(key = variable_time, value = values) 239 | 240 | # store wideformat 241 | write.table(esswide, "ESS_aggregated_wide.csv", row.names=FALSE, sep=",") 242 | write_dta(esswide, "ESS_aggregated_wide.dta") 243 | ``` 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | \setlength{\tabcolsep}{2pt} 256 | 257 | # To-do 258 | * Make a map showing all European countries (see pol. of human values) 259 | 260 | # Introduction 261 | A healthy and stable democracy requires a basic level of trust in its political institutions (Warren 1999). Political trust is not only "essential to a democratic community's well-being" (Mara 2001, 820) and for individuals' willingness to engage in collective actions (Hardin 1999), but also for the implementation of public policies, especially "when government programs require people to make sacrifices" (Hetherington 2005: 4). Given the central role of trust in assuring the legitimacy of democratic systems (van der Meer 2010, 518), it is therefore not surprising that scholars have raised concerns about a generalized decline of trust in democratic institutions across the Western world in recent decades (Dalton 2004; Norris 1999; Pharr & Putnam 2000). 262 | Trust may be particularly crucial to guarantee the legitimacy of relatively "young institutions" such as the European Union (in comparison to most nation states), especially if they need to challenge a supposed democratic deficit, as some scholars have argued in relation to the EU (see for example Follesdal & Hix 2006). Although support for the EU has remained relatively stable since the mid-1990s (Hix 2008), recent studies have shown that citizens' confidence in the functioning of different European institutions has declined as a consequence of the 2008 economic crisis (Dotti Sani & Magistro 2016; Ehrmann et al. 2013; Roth et al. 2013; Armingeon & Ceka 2014; Armingeon & Guthmann 2014). Support for the EU has decreased both as a direct consequence of a series of anti-crisis policy measures adopted by European institutions, and as an indirect consequence of a general decline in support for national government (Armingeon & Ceka 2014). 263 | Yet, despite growing evidence of a decline of support for European institutions, current research has not considered whether heterogeneous changes in trust in the EU might have occurred across individuals and countries over time. Since the literature so far has focused only on shifts in average levels of trust, we do not know whether the distribution of trust levels across countries and individuals has also significantly changed in response to the economic crisis. An important exception in this sense is a recent study by Dotti Sani and Magistro (2016), in which they investigate whether the economic crisis led to "a growing gap" in trust in the European Parliament (EP). Their findings indicate that support for the EP declined mostly in the countries that were hit the most by the crisis, and that "subjects from the lower positions in the social ladder . lost more trust in the EP compared to those who are better off" (Dotti Sani & Magistro 2016: 260). 264 | Building on this background, we pursue the following questions: How polarized is political trust across European member states? 265 | 266 | 267 | in this study we aim to shed light on how trust in the European Parliament has changed over time across individuals of different European member states. In addition to considering average changes over time, we analyze changes in the distribution of trust levels in the population, in order to identify whether and how trust in the EP has polarized as a consequence of the economic crisis. Since the crisis has not equally hit citizens from different European countries and from different social strata, we expect that trust in the EP has not only declined after the economic crisis, but has also polarized, especially in the countries that suffered a stronger economic downturn. We argue that focusing on the distribution of trust is as equally important has focusing on average levels, especially if a decline in trust is accompanied by increasing polarization, which, in turn, might lead to increasing conflict and political instability (Esteban and Schneider 2006). This promising change of focus from averages to variances or other measures of spread is generally rarely done within quantitative research (??). Specifcally this gap has also been pointed to within research on trust and social capital (?: 91,122). 268 | Besides providing novel evidence regarding political trust and support for the European Union, we contribute to the theoretical debate on how to define and measure polarization. We broadly define a process of polarization as a situation in which individuals' positions on one or several scales are increasingly separated over time. The nature of this separation, i.e. to what extent different characteristics of a distribution reflect the idea of polarization is still subject to debate [@Bauer2018-ti, 1]. Likewise, scholars are still somewhat unsure about the extent of polarization with regard to different phenomena, among other things also political trust [e.g. @Hetherington2014-dg; @Hetherington2015-cd]. 269 | To our knowledge there is no empirical study that takes stock of the polarization of trust across countries across time. We do so by analyzing eight waves of the European Social Survey. Moreover, analyzing polarization challenges existing theory to explain not only average differences between countries but also differences in the within-heterogeneity of countries. Thus, besides a purely descriptive endeavor we make a theoretical contribution by developing explanations of why some countries are more polarized then others. Subsequently, we test these various explanations empirically. 270 | Our findings show that. 271 | We proceed as follows: Section [2](#sec:theory) discusses how one may conceptualize and measure the polarization of political trust. Section [3](#sec:data) presents the design, data and measures. Section [4](#sec:results) summarizes the results. Section [5](#sec:conclusion) provides a summary, discusses limitations and provides rationales for future research. 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | Investigating this question contributes to a vivid area of research within comparative politics that investigates political trust across countries (Sources?). We broadly define a process of polarization as a situation in which individuals’ positions on one or several scales are increasingly separated over time. The nature of this separation, i.e. to what extent different characteristics of a distribution reflect the idea of polarization is still subject to debate [@Bauer2018-ti, 1]. Likewise, scholars are still somewhat unsure about the extent of polarization with regard to different phenomena, among other things also political trust [e.g. @Hetherington2014-dg; @Hetherington2015-cd]. We contribute to current scholarship in different ways. 291 | First, we provide the first descriptive overview of the polarization of political trust in Europe. In general, comparative research to this date has overlooked the fact that trust does not spread uniformly across societies. Rather trust is unevenly distributed between segments of societies (?: 171). This promising change of focus from averages to variances or other measures of spread is generally rarely done within quantitativ research (??). Specifcally this gap has also been pointed to within research on trust and social capital (?: 91,122). Focusing on polarization informs us about differences between populations that are generally overseen when studying means. To our knowledge there is no empirical study that takes stock of the polarization of trust across countries across time. We do so by analyzing eight waves of the European Social Survey. Moreover, analyzing polarization challenges existing theory to explain not only average differences between countries but also differences in the within-heterogeneity of countries. Thus, besides a purely descriptive endeavor we make a theoretical contribution by developing explanations of why some countries are more polarized then others. Subsequently, we test these various explanations empirically. 292 | Second, we discuss some of the challenges when it comes to measuring the polarization of trust. Thereby, we also discuss what those results actually mean... 293 | 294 | 295 | We proceed as follows: Section [2](#sec:theory) discusses how one may conceptualize and measure the polarization of political trust. Section [3](#sec:data) presents the design, data and measures. Section [4](#sec:results) summarizes the results. Section [5](#sec:conclusion) provides a summary, discusses limitations and provides rationales for future research. 296 | 297 | 298 | 299 | 300 | 301 | # Conceptualizing and measuring the polarization of trust {#sec:concept} 302 | "Broadly speaking, the level of polarization describes to what degree individuals' positions on one or several scales are separated. A process or trend of (de-)polarization describes how the level of polarization changes over time. However, the nature of this separation, i.e. to what extent different characteristics of a distribution reflect the idea of polarization is still subject to debate, a debate that is linked to the scale(s) that underly one's concept of polarization. Scholars arrive at different conclusions regarding the levels and trends of polarization of different phenomena and polarization's impact on other phenomena (e.g. violence) and are also critical of certain measures [see e.g. @Downey2001-dg]. In part, such differing conclusions are caused by the use of different conceptualizations or measures of polarization." [@Bauer2018-ti] 303 | In this paper we pursue two notions and corresponding measures of polarization. The first notion is that polarization is higher as more individuals have diverging, more extreme positions. From a distributional perspective individuals would be more dispersed on a scale. We use this notion to gauge the overall polarization across countries and use the variance as a concrete measure [cf. @DiMaggio1996-pr]: 304 | 305 | $${\displaystyle v_{tc}={\frac {1}{n_{ct}}}\sum \limits _{i=1}^{n_{ct}}\left(x_{cit}-{\overline{x_{ct}}}\right)^{2}}$$ 306 | 307 | where $v_{tc}$ is the country-year value of the variance of trust in the European Parliament, $x_{cit}$ is an individual $i$'s level of trust in country $c$ at time $t$, $\overline{x_{ct}}$ is the respective country average at time $t$, and $n_{ct}$ is the sample size (number of individuals) we have for country $c$ at time $t$. 308 | While a crude measure, the variance convinces through it's intepretational simplicity. The more individuals have positions far from a sample's mean the higher the level of polarization. Since the distance is squared, those far from the mean $\overline{x_{ct}}$ carry more weight. 309 | The second notion is linked to the idea of clusters in a distribution. Generally, polarization increase as the within-cluster distance decreases while the between-cluster distance increases. We apply this notion to test the overall polarization is potentially linked to more extreme positions of certain subgroups we employ a measure of between-group polarization introduce by @Esteban1994-ai to which we refer as *er index*. @Esteban1994-ai formally derive a polarization measure for discrete distributions: 310 | 311 | $$P(\pi,\mathbf{y}) = \sum_{i = 1}^{n} \sum_{j = 1}^{n}\pi_{i}\pi_{j}\mathbf{T}(\mathbf{I}(\pi_{i}),a(\delta(y_{i}, y_{j})))$$ 312 | 313 | Conceptually, $P$ depends on *identification* $\mathbf{I}$ which is a function of individual i's group size $\pi_{i}$. Identification reflects the idea that an individual feels a sense of identification with others in the group, a sense that increases with the number of individuals in this group (reflected by $\pi_{i}$). $P$ also depends on *alienation* $a(\delta(y_{i}, y_{j}))$ where $\delta(y_{i}, y_{j})$ simply describes the absolute distance between two individuals $i$ and $j$. The *effective antagonism* is given by a cardinal function $T(\mathbf{I},a)$ that combines both identification and alienation. 314 | The choice of $\mathbf{I}(\dot)$, $a(\dot)$ and $\mathbf{T}(\dot)$ will yield a particular measure of polarization: 315 | 316 | $$P^{*}(\pi,\mathbf{y}) = K \sum_{i = 1}^{n} \sum_{j = 1}^{n}\pi_{i}^{1+\alpha}\pi_{j}|y_{i} - y_{j}|$$ 317 | where K is a multiplicative constant that is used for normalization, and $\alpha$ restricts identification function to the form $p^\alpha$ where $\alpha > 0$. As $\alpha$ goes to zero $P$ approximates the Gini coefficient (defined on log incomes). $\pi$ is a vector of shares and $\mathbf{y}$ is a vector of means. $y_{i}$ and $y_{j}$ are the means in the respective groups and $\pi_{i}$ and $\pi_{j}$ are the proportions of observations in those groups. The choice of groups is a exogenous process and we'll discuss which subgroups we investigate in the next section. 318 | 319 | 320 | 321 | 322 | 323 | 324 | * Check out recent papers/measures by 325 | + @Kleiner2018-uz 326 | + @Kleiner2016-dk 327 | 328 | Various measures of polarization have been proposed during recent years. Importantly, some are more, some a less amenable to our application. In the present study trust is measured on 11-point scales going from "0 - No trust at all" to "10 - complete trust". For those scales we can assume the a interval measurement level. 329 | 330 | 331 | Although the theoretical definition of polarisation provided so far seems straightforward, its practical application proves complicated. The key problem lies in the fact that there is no hierarchical relation between the two parameters of dispersion and bimodality. Thus, in the case of a distribution that is more dispersed but less bimodal than another distribution, it is hard to define which distribution is more polarised. A technical solution could be developing a single index of polarisation, by combining the statistical measures of variance and kurtosis, which, according to the literature (DiMaggio et al., 1996; Baldassarri and Bearman, 2007), respectively capture the dimensions of dispersion and bimodality. However, the indexes proposed so far do not seem satisfactory or suitable for the field of public opinion. In addition, the parameter of kurtosis suffers from severe limitations especially in the case of small samples (DeCarlo, 1997). 332 | A concrete example concerning the traditionally polarised issue of abortion can help clarify these problems. As a starting point, we can think of a society that is sharply divided into two groups with opposing but at the same time moderate views on the issue of abortion. At Time 1, therefore, both groups agree that in certain cases abortion could be either allowed or forbidden. Imagine that subsequently the issue of abortion becomes salient in the public debate because, for example, a new law is about to be approved. The result can be that at Time 2 a number of individuals shift towards more radical opinions both in favour and against abortion (thus opinions become more dispersed), but at the same time individuals might cluster into several smaller groups (thus the distribution of opinions becomes less bimodal). Unless a single index is developed, we can hardly define whether the distribution of opinions at Time 2 is more or less polarised than the distribution at Time 1. 333 | 334 | 335 | 336 | # Theory and hypotheses: Winner's and looser's {#sec:theory} 337 | We investigate both overal polarization and between-subgroup polarization with regard to trust in the EU. There is ample evidence show that trust in EU institutions has declined in recent years. Various accounts that differ in terms of complexity theorize this development. While the decline of overall levels of trust in the EU is at least to some extent a result of the politics of EU institutions, e.g. the economic crisis and the austerity policies that the EU imposed—in tandem with the IMF—on several member countries [@Armingeon2014-ut] one of the most important determinants of trust in EU institutions are evaluation of national-level institutions. 338 | 339 | 340 | 341 | 342 | @Armingeon2014-ut 343 | 344 | 345 | 346 | How can we explain the decline in support for the European Union (EU) and the idea of European integration after the onset of the great recession in the fall of 2007? Did the economic crisis and the austerity policies that the EU imposed—in tandem with the IMF—on several member countries help cause this drop? While there is some evidence for this direct effect of EU policies, we find that the most significant deter- minant of trust and support for the EU remains the level of trust in national govern- ments. Based on cue theory and using concepts of diffuse and specific support, we find that support for the EU is derived from evaluations of national politics and policy, which Europeans know far better than the remote political system of the EU. This effect, however, is somewhat muted for those sophisticated Europeans that are more know- ledgeable about the EU and are able to form opinions about it independently of the national contexts in which they live. We also find that the recent economic crisis has led to a discernible increase in the number of those who are disillusioned with politics both at the national and the supranational level. We analyze 133 national surveys from 27 EU countries by estimating a series of cross-classified multilevel logistic regression models 347 | 348 | To start we *hypothesize that the overal level of polarization of trust in the EU parliament has increased, i.e. there is a positive trend in the level of polarization (H1)*. This overall increase in polarization will most certainly be due to heavy increases in particular countries and underlying heterogeneity that we attempt to show and visualize. 349 | In what regard within county-between-subgroup polarization, that is the question between which societal subgroups, polarization increased the strongest. 350 | 351 | 352 | 353 | among which subgroups polarization it is harder to make clear theoretical predictions and hypotheses. 354 | 355 | 356 | 357 | 358 | 359 | 360 | acrossexpect to find that the overall of polarization has increased across 361 | 362 | 363 | with differing complexity theorize this development of trust decline. Some 364 | 365 | 366 | and this development has been convincinlgy theorized. 367 | 368 | 369 | Not much theorizing is required when it comes overall levels and trends of EU trust. It is to be assumed. 370 | 371 | 372 | 373 | 374 | There is ample evidence 375 | * Explanation of overall trend 376 | 377 | 378 | 379 | 380 | 381 | In our analysis we contrast the following groups: Low education (<12 years) vs. high education (>12 years), struggling with income (Difficult on present income + Very difficult on present income) vs. not struggling with income (Living comfortably on present income + Coping on present income), unemployed/retired/non-employed vs. in education/paid work, younger (<34) vs. older (>65). 382 | 383 | 384 | 385 | 386 | # Data, models and further measures {#sec:data} 387 | We rely on data from 8 waves of the European Social Survey for the years ```r seq(2002, 2016, 2)```. Not all EU member states were observed across the full time span. The question we focus on is the following: 388 | 389 | *Using this card, please tell me on a score of 0-10 how much you personally trust each of the institutions I read out. 0 means you do not trust an institution at all, and 10 means you have complete trust. Firstly...* And we are interested in respondents' jugments of *the European Parliament*. 390 | 391 | We calculate both overall polarization, as well as, between-group polarization across countries across time as outlined in Section \@ref(sec:concept). Hence, we are able to estimate time trends for both. Corresponding to those two measures we have several outcome variables, one for the overall polarization levels and several more corresponding to between-subgroup polarization of different subgroups. The variables contain year-country specific values of both overall polarization as well as between group polarization. 392 | The groups we define for between subgroup polarization are age, the extent to which they can live comfortably with their income, employment status, education years and party id (can we compare countries with different numbers of parties?). 393 | To estimate trends, we use time as a explanatory variable (going from 2002 - 2016) and rescale this variable so that it centers at 2009. We start with Model A that is specified as follows: 394 | 395 | $$y_{it} = \alpha + \beta \times time + \epsilon_{t}$$ 396 | 397 | where $y_{it}$ represents the level of polarization in a particular country $i$ at time $t$, $\beta$ the coefficient of time, i.e. by how much trust or the polarization of trust increases per year and errors $\epsilon_{it}$ (deviations from our model) that are assumed to follow a normal distribution. 398 | 399 | Implementing a multilevel approachserves at least two purposes. First, the estimation of some trends may be unstable,especially when a specific pair of issues is only observed for a few years. The varying-intercept, varying-slope approach allows us to explicitly capture this uncertainty in theestimates. Secondly, simply estimating an overall trend is only the first step of ouranalysis. In order to obtain a finer-grained picture of POP trends, we are also interested inlooking at issue-dimension-specific correlation trends. The estimated variation in themultilevel models gives us exactly this kind of information.While Model A provides an estimate for the overall trend of POP (the averageofbpacross all pairs,bp!b) and issue-pair-specific trends, we respecify the basic modelin several ways. In Model B, we estimate trends of attitude scale pairs that are locatedwithin or between issue dimensions. More precisely, it allows us to evaluate separatetrends for within-dimension attitude scale pairs and between-dimension attitude scalepairs. Here, the time trend variable is interacted with a dummy variable that indicateswhether the two attitude scales belong to the same or different issue dimensions. Thus wecan determine whether attitude alignment between attitude scales belonging to a certaindimension has risen more than alignment between attitude scales that belong to twodifferent dimensions. Model C introduces four groups that correspond to the three issuedimensions and a group for cross-dimension pairs (that is, the group of attitude scalepairs that do not belong to the same dimension). As ever,rptis the correlation of a pair ofattitude scales at pointtin time and the group level is a specific pair. The trend term is now interacted with a variable that indicates the attitude scale pairs belonging to a certainissue dimension.When moving to the sub-group analyses, we simply divide the sample of respondentsaccording to certain characteristics (gender, education level, income, political interest,religious denomination and East versus West). Statistically, the models follow the knownspecification. 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | # Empirical results {#sec:results} 412 | 413 | ## Averages and polarization of trust: Levels and trends 414 | 415 | Figure \ref{fig:fig-alldata} visualizes all our data points namely trust averages across countries (mean) and trust polarization levels across countries. As is visible in Figure \ref{fig:fig-alldata} trust means range from `r paste(data.agg %>% ungroup() %>% dplyr::select(trust_euparliament_mean, country, year) %>% arrange(trust_euparliament_mean) %>% mutate_at(1, round, 2) %>% slice(1) %>% as.character(), collapse = ", ")` to `r paste(data.agg %>% ungroup() %>% dplyr::select(trust_euparliament_mean, country, year) %>% arrange(desc(trust_euparliament_mean)) %>% mutate_at(1, round, 2) %>% slice(1) %>% as.character(), collapse = ", ")`. Trust polarization ranges from `r paste(data.agg %>% ungroup() %>% dplyr::select(trust_euparliament_sd, country, year) %>% arrange(trust_euparliament_sd) %>% mutate_at(1, round, 2) %>% slice(1) %>% as.character(), collapse = ", ")` to `r paste(data.agg %>% ungroup() %>% dplyr::select(trust_euparliament_sd, country, year) %>% arrange(desc(trust_euparliament_sd))%>% mutate_at(1, round, 2) %>% slice(1) %>% as.character(), collapse = ", ")`. Figure \ref{fig:fig-alldata} illustrates there there is not only variation across countries (both in terms of average trust and polarization) but also across time. 416 | 417 | 418 | ```{r fig-alldata, echo=FALSE, message=FALSE, warning=FALSE, fig.width=7, fig.height=9, fig.cap="European parliament: Level and polarization of trust across time\\label{fig:fig-alldata}", fig.align="H"} 419 | library(dplyr) 420 | 421 | vars <- "trust_euparliament_mean" 422 | vars2 <- "trust_euparliament_sd" 423 | 424 | 425 | # VERY STRANGE GRAPH FOR trust_un_pol 426 | # data.agg.tmp <- data.agg %>% ungroup() %>% arrange((!!rlang::sym("trust_euparliament_mean"))) 427 | 428 | data.agg.tmp <- data.agg %>% ungroup() %>% arrange(trust_euparliament_mean) 429 | 430 | 431 | tmp <- data.frame(country=unique(data.agg.tmp$country),rank=1:28) # generate rank vector 432 | data.agg.tmp$rank <- tmp$rank[match(data.agg.tmp$country,tmp$country)] # add rank vector to data.agg.tmp 433 | data.agg.tmp <- arrange(data.agg.tmp, rank) # order rank then year 434 | 435 | #pdf(paste("distributions", "_descriptive.pdf", sep=""), width=7,height=9) 436 | par(mfrow=c(1,2), mar = c(3,8,1,0), oma = c(0,0,0,2)) 437 | countries <- unique(data.agg.tmp$country) 438 | yrange <- c(1,length(countries)) 439 | # 440 | xrange <- c(1,7) 441 | plot(xrange, yrange, type="n", xlab="", ylab="", xlim=xrange, xaxt='n', yaxt="n") 442 | mtext(paste("Mean [mean]: \n", "trust_euparliament_mean", sep=""), side = 1, line = 2, cex=.8) 443 | axis(1, at=seq(0,10,1), cex.axis=.8, tck=-0.01, labels = NA) 444 | axis(side = 1, at=seq(0,10,1), lwd = 0, line = -.7, cex.axis=.8) 445 | axis(3, at=seq(0,10,1), cex.axis=.8) 446 | colors <- rep(c("#ca0020", "#0571b0", "#a6611a", "#404040"), 10) # "#d7191c", "#fdae61", "#a6d96a", "#1a9641" 447 | axis(2, at=c(1:length(countries)),labels=FALSE, cex.axis=.8, las=1, col.axis="black") 448 | z <- 1 449 | abline(h=1:35, lty=2, col="gray") 450 | abline(v=1:10, lty=2, col="gray") 451 | for(i in countries){ 452 | tmp.data.agg <- data.agg.tmp[data.agg.tmp$country==i,] 453 | points(pull(tmp.data.agg[,"trust_euparliament_mean"]), (z+(as.numeric(as.character(tmp.data.agg$year))-2000)/20)-.3, type="b", lwd=1.5, lty=1, col=colors[z], pch=1, cex=.6) 454 | z <- z+1 455 | } 456 | arrows(6, 0.5, 6,1.5, length=.07) 457 | text(6.5, 0.5, "2002", cex=.5) 458 | text(6.5, 1.5, "2016", cex=.5) 459 | text(labels=unique(data.agg.tmp$country), col=colors, x=rep(-0.5,length(unique(data.agg.tmp$country))),y=1:length(unique(data.agg.tmp$country)), srt = 0, pos = 2, xpd = TRUE, cex=.8) 460 | xrange <- c(0,4) 461 | par(mar = c(3,1,1,4)) 462 | plot(xrange, yrange, type="n", xlab="", ylab="", xlim=xrange, xaxt='n', yaxt="n") 463 | axis(1, at=seq(0,10,1), cex.axis=.8, tck=-0.01, labels = NA) 464 | axis(side = 1, at=seq(0,10,1), lwd = 0, line = -.7, cex.axis=.8) 465 | axis(3, at=0:10, labels=0:10, cex.axis=.8) 466 | # axis(2, at=c(1:length(countries)),labels=FALSE, cex.axis=.8, las=1, col.axis="black") 467 | z <- 1 468 | abline(h=1:35, lty=2, col="gray") 469 | abline(v=seq(1,10,1), lty=2, col="gray") 470 | for(i in countries){ 471 | tmp.data.agg <- data.agg.tmp[data.agg.tmp$country==i,] 472 | points(pull(tmp.data.agg[,"trust_euparliament_pol"]), (z+(as.numeric(as.character(tmp.data.agg$year))-2000)/20)-.3, type="b", lwd=1.5, lty=1, col=colors[z], pch=1, cex=.6) 473 | z <- z+1 474 | } 475 | mtext(paste("Polarization [SD]: \n", "trust_euparliament_sd", sep=""), side = 1, line = 2, cex=.8) 476 | 477 | # text(labels=unique(data.agg$country), col=colors, x=rep(-.5,length(unique(data.agg$country))),y=1:length(unique(data.agg$country)), srt = 0, pos = 2, xpd = TRUE) 478 | # savePlot(filename="ess_descriptive.pdf", type="pdf") 479 | #dev.off() 480 | ``` 481 | 482 | 483 | 484 | 485 | 486 | Table \@ref(tab:trust-trends) displays the estimates a very simple linear regression model to illustrate display the time trend. If shows there there is both an a deacrease in the overall average of trust, as well as an increase in the overall polarization of trust across countries. 487 | 488 | 489 | 490 | ```{r model-time-trend, echo=FALSE, message=FALSE, warning=FALSE, results="asis"} 491 | library(stargazer) 492 | data.agg$year <- as.numeric(data.agg$year) 493 | fit1 <- lm(trust_euparliament_mean ~ year + as.factor(country), data = data.agg) 494 | fit2 <- lm(trust_euparliament_mean ~ year + as.factor(country), data = data.agg) 495 | 496 | fit3 <- lm(trust_euparliament_sd ~ year + as.factor(country), data = data.agg) 497 | fit4 <- lm(trust_euparliament_sd ~ year + as.factor(country), data = data.agg) 498 | 499 | 500 | stargazer(fit1, fit2, fit3, fit4, 501 | type="latex", 502 | title = "Time trend: Trust EU Parliament", 503 | omit.stat=c("LL","ser","f","adj.rsq"), 504 | omit = c("country"), 505 | dep.var.caption = "Outcome: Trust EU Parliament", 506 | dep.var.labels = c("Mean", "Sd."), 507 | ci=FALSE, digits=2, 508 | ci.level=0.95, 509 | single.row=FALSE, 510 | label = "tab:trust-trends", 511 | table.placement="H", 512 | column.sep.width = "-7pt", 513 | align = TRUE, 514 | column.labels = c("M1", "M2", "M3", "M4", "M5", "M6", "M7"), 515 | model.names = FALSE, 516 | model.numbers = FALSE, 517 | star.cutoffs = c(0.05, 0.01, 0.001), 518 | notes = "* Data: European Social Survey; Country fixed-effects ", 519 | header=FALSE, 520 | no.space=TRUE, 521 | font.size = "footnotesize" 522 | ) 523 | ``` 524 | 525 | 526 | Figure \ref(fig:fig-timetrends) visualizes time trends in the data. Estimate a multilevel model... 527 | 528 | ```{r fig-time-trends, eval=FALSE, fig.align="H", fig.cap="Time trends of polarization\\label{fig:fig-timetrends}", message=FALSE, warning=FALSE, include=FALSE} 529 | library(plotly) 530 | 531 | data.lineplot <- data.agg %>% group_by(country) %>% 532 | arrange(country, year) %>% 533 | mutate(max_mean = max(trust_euparliament_mean), 534 | min_mean = min(trust_euparliament_mean), 535 | max_sd = max(trust_euparliament_sd), 536 | min_sd = min(trust_euparliament_sd), 537 | first_mean = first(trust_euparliament_mean), 538 | last_mean = last(trust_euparliament_mean), 539 | first_sd = first(trust_euparliament_sd), 540 | last_sd = last(trust_euparliament_sd), 541 | diff_mean = first_mean - last_mean, 542 | diff_sd = first_sd - last_sd, 543 | diff_mean_groups = cut(diff_mean, 544 | breaks = 545 | quantile(diff_mean, c(0, 0.05, 0.2, 0.4, 0.6, 0.8, 0.95, 1)), 546 | labels = as.character(0:6)), 547 | diff_sd_groups = cut(diff_sd, breaks = c(-1, -0.14956, -0.07142, 0.07580, 5), labels = c("0", "1", "2", "3"))) 548 | 549 | 550 | 551 | 552 | 553 | p1 <- plot_ly(data.lineplot, 554 | x = ~year, 555 | y = ~trust_euparliament_mean, 556 | type = 'scatter', 557 | mode = 'lines', 558 | color = ~country) %>% 559 | layout(yaxis = list(range = c(0,10), title = "Trust: EU parliament"), 560 | xaxis = list(title = "year")) 561 | 562 | p2 <- plot_ly(data.lineplot, 563 | x = ~year, 564 | y = ~trust_euparliament_pol, 565 | type = 'scatter', 566 | mode = 'lines', 567 | color = ~country) %>% 568 | layout(yaxis = list(range = c(0,10), title = "Trust polarization: EU parliament"), 569 | xaxis = list(title = "year")) 570 | 571 | p.sm <- subplot(p1, p2, nrows=1, shareX = FALSE, shareY = FALSE, 572 | titleX = T, titleY = T, margin = 0.05) %>% 573 | layout(showlegend = FALSE, autosize = T, height = 400, width = 800) 574 | p.sm 575 | ``` 576 | 577 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} 578 | library(plotly) 579 | export(p.sm, file = "fig-time-trends.pdf", 580 | zoom = 0.6 581 | ) 582 | export(p.sm, file = "fig-time-trends.png", 583 | cliprect = c(60, 60, 1000, 1000) 584 | #, zoom = 2 585 | ) 586 | ``` 587 | 588 | ```{r fig-time-trends2, eval=FALSE, fig.align="H", fig.cap="Time trends of polarization\\label{fig:fig-timetrends-groups}", message=FALSE, warning=FALSE, include=FALSE} 589 | 590 | data.lineplot.agg <- data.lineplot %>% group_by(year, diff_mean_groups) %>% 591 | summarise_at(vars(trust_euparliament_mean), funs(mean, sd), na.rm = TRUE) %>% arrange(diff_mean_groups, year) %>% ungroup() 592 | 593 | 594 | 595 | p1 <- plot_ly(data.lineplot.agg, 596 | x = ~year, 597 | y = ~mean, 598 | type = 'scatter', 599 | mode = 'lines', 600 | color = ~diff_mean_groups, 601 | linetype = ~diff_mean_groups) %>% 602 | layout(yaxis = list(range = c(2,8), title = "Trust mean: EU parliament"), 603 | xaxis = list(title = "year")) 604 | 605 | p2 <- plot_ly(data.lineplot.agg, 606 | x = ~year, 607 | y = ~sd, 608 | type = 'scatter', 609 | mode = 'lines', 610 | color = ~diff_mean_groups, 611 | linetype = ~diff_mean_groups) %>% 612 | layout(yaxis = list(range = c(0,2), title = "Trust polarization: EU parliament"), 613 | xaxis = list(title = "year")) 614 | 615 | p.sm <- subplot(p1, p2, nrows=1, shareX = FALSE, shareY = FALSE, 616 | titleX = T, titleY = T, margin = 0.05) %>% 617 | layout(showlegend = FALSE, autosize = T, height = 400, width = 800) 618 | p.sm 619 | ``` 620 | 621 | ```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} 622 | library(plotly) 623 | export(p.sm, file = "fig-time-trends.pdf", 624 | zoom = 0.6 625 | ) 626 | export(p.sm, file = "fig-time-trends.png", 627 | cliprect = c(60, 60, 1000, 1000) 628 | #, zoom = 2 629 | ) 630 | ``` 631 | 632 | 633 | ## Between-group polarization: Levels and trends 634 | 635 | Analogue to @Lelkes2016-bk, @Levendusky2011-ax and @Schmid2006-jn we rely on the overlap coefficent to measure between group polarization. Below we divide the sample into the following groups... 636 | 637 | 638 | 639 | 640 | ```{r compare-overlap-overlapping-package, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE, results="asis"} 641 | # https://stats.stackexchange.com/questions/97596/how-to-calculate-overlap-between-empirical-probability-densities 642 | 643 | library(overlap) 644 | # Examples with own data 645 | a <- data %>% filter(country == "Germany", age_cat == 0, year == 2002) %>% select(trust_euparliament) %>% na.omit() %>% pull() 646 | b <- data %>% filter(country == "Germany", age_cat == 1, year == 2002) %>% select(trust_euparliament) %>% na.omit() %>% pull() 647 | 648 | 649 | # OVERLAP PACKAGE 650 | 651 | # To use overplapTrue(){overlap} the scale must be in radian (i.e. 0 to 2pi) 652 | # To keep the *relative* value of a and b the same, combine a and b in the 653 | # same dataframe before rescaling. You'll need to load the ‘scales‘ library. 654 | # But first add a "Source" column to be able to distinguish between a and b 655 | # after they are combined. 656 | a = data.frame( value = a, Source = "a" ) 657 | b = data.frame( value = b, Source = "b" ) 658 | d = rbind(a, b) 659 | library(scales) 660 | d$value <- rescale( d$value, to = c(0,2*pi) ) 661 | 662 | # Now you can created the rescaled a and b vectors 663 | a <- d[d$Source == "a", 1] 664 | b <- d[d$Source == "b", 1] 665 | 666 | # You can then calculate the area of overlap as you did previously. 667 | # It should give almost exactly the same answers. 668 | # Or you can use either the overlapTrue() and overlapEst() function 669 | # provided with the overlap packages. 670 | # Note that with these function the KDE are fitted using von Mises kernel. 671 | library(overlap) 672 | # Using overlapTrue(): 673 | # define limits of a common grid, adding a buffer so that tails aren't cut off 674 | lower <- min(d$value)-1 675 | upper <- max(d$value)+1 676 | # generate kernel densities 677 | da <- density(a, from=lower, to=upper, adjust = 1) 678 | db <- density(b, from=lower, to=upper, adjust = 1) 679 | # Compute overlap coefficient 680 | overlapTrue(da$y,db$y) 681 | 682 | 683 | # Using overlapEst(): 684 | overlapEst(a, b, kmax = 3, adjust=c(0.8, 1, 4), n.grid = 500) 685 | 686 | # You can also plot the two KDEs and the region of overlap using overlapPlot() 687 | # but sadly I haven't found a way of changing the x scale so that the scale 688 | # range correspond to the initial x value and not the rescaled value. 689 | # You can only change the maximum value of the scale using the xscale argument 690 | # (i.e. it always range from 0 to n, where n is set with xscale = n). 691 | # So if some of your data take negative value, you're probably better off with 692 | # a different plotting method. You can change the x label with the xlab 693 | # argument. 694 | overlapPlot(a, b, xscale = 10, xlab= "x metrics", rug=T) 695 | 696 | 697 | 698 | # OVERLAPPING PACKAGE 699 | 700 | library(overlapping) 701 | 702 | # Overlapping package 703 | x <- list(trust.old = a, 704 | trust.young = b) 705 | 706 | overlap(x, nbins = 1000, plot = TRUE, partial.plot = TRUE) 707 | 708 | ``` 709 | 710 | 711 | ```{r overlap-coefficient, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE, results="asis"} 712 | 713 | 714 | library(overlapping) # or try overlap package 715 | 716 | # Variables 717 | # age_cat, hinc_good, paid_work, education_cat 718 | 719 | # age_cat 720 | list <- data %>% split(.,list(.$country,.$year,.$age_cat), drop = TRUE) 721 | for(i in 1:length(list)){ 722 | list[[i]] <- list[[i]][,4] # Only keep trust variable in list 723 | list[[i]] <- list[[i]][!is.na(list[[i]])] # remove missings 724 | } 725 | loop.is <- sub('\\.([^\\.]*)$', '', names(list)) 726 | list.OV <- NULL 727 | for(z in loop.is){ 728 | tmp <- list(trust.group0 = list[[paste(z, ".0", sep = "")]], 729 | trust.group1 = list[[paste(z, ".1", sep = "")]]) 730 | list.OV[[z]] <- overlap(tmp, nbins = 1000, plot = FALSE, partial.plot = FALSE)$OV 731 | } 732 | results_age_cat <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"), 733 | year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"), 734 | OV_age_cat = list.OV) 735 | 736 | 737 | 738 | # hinc_good 739 | list <- data %>% split(.,list(.$country,.$year,.$hinc_good), drop = TRUE) 740 | for(i in 1:length(list)){ 741 | list[[i]] <- list[[i]][,4] # Only keep trust variable in list 742 | list[[i]] <- list[[i]][!is.na(list[[i]])] # remove missings 743 | } 744 | loop.is <- sub('\\.([^\\.]*)$', '', names(list)) 745 | list.OV <- NULL 746 | for(z in loop.is){ 747 | tmp <- list(trust.group0 = list[[paste(z, ".0", sep = "")]], 748 | trust.group1 = list[[paste(z, ".1", sep = "")]]) 749 | list.OV[[z]] <- overlap(tmp, nbins = 1000, plot = FALSE, partial.plot = FALSE)$OV 750 | } 751 | results_hinc_good <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"), 752 | year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"), 753 | OV_hinc_good = list.OV) 754 | 755 | 756 | # paid_work 757 | list <- data %>% split(.,list(.$country,.$year,.$paid_work), drop = TRUE) 758 | for(i in 1:length(list)){ 759 | list[[i]] <- list[[i]][,4] # Only keep trust variable in list 760 | list[[i]] <- list[[i]][!is.na(list[[i]])] # remove missings 761 | } 762 | loop.is <- sub('\\.([^\\.]*)$', '', names(list)) 763 | list.OV <- NULL 764 | for(z in loop.is){ 765 | tmp <- list(trust.group0 = list[[paste(z, ".0", sep = "")]], 766 | trust.group1 = list[[paste(z, ".1", sep = "")]]) 767 | list.OV[[z]] <- overlap(tmp, nbins = 1000, plot = FALSE, partial.plot = FALSE)$OV 768 | } 769 | results_paid_work <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"), 770 | year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"), 771 | OV_paid_work = list.OV) 772 | 773 | 774 | # education_cat 775 | list <- data %>% split(.,list(.$country,.$year,.$education_cat), drop = TRUE) 776 | for(i in 1:length(list)){ 777 | list[[i]] <- list[[i]][,4] # Only keep trust variable in list 778 | list[[i]] <- list[[i]][!is.na(list[[i]])] # remove missings 779 | } 780 | loop.is <- sub('\\.([^\\.]*)$', '', names(list)) 781 | list.OV <- NULL 782 | for(z in loop.is){ 783 | tmp <- list(trust.group0 = list[[paste(z, ".0", sep = "")]], 784 | trust.group1 = list[[paste(z, ".1", sep = "")]]) 785 | list.OV[[z]] <- overlap(tmp, nbins = 1000, plot = FALSE, partial.plot = FALSE)$OV 786 | } 787 | results_education_cat <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"), 788 | year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"), 789 | OV_education_cat = list.OV) 790 | 791 | 792 | # Merge datasets 793 | OV <- full_join(results_age_cat, results_hinc_good, by = c("country", "year")) 794 | OV <- full_join(OV, results_paid_work, by = c("country", "year")) 795 | OV <- full_join(OV, results_education_cat, by = c("country", "year")) 796 | 797 | write.csv(OV, "data_ov.csv", row.names = FALSE) 798 | 799 | 800 | ``` 801 | 802 | ```{r import-overlap, message=FALSE, warning=FALSE, include=FALSE} 803 | 804 | OV <- read.csv("data_ov.csv") 805 | ``` 806 | 807 | ```{r visualize-overlap-trend, echo=FALSE, message=FALSE, warning=FALSE} 808 | library(plotly) 809 | OV.agg <- OV %>% group_by(year) %>% 810 | summarise_all(funs(mean(., na.rm = TRUE))) %>% arrange(year) %>% ungroup() 811 | 812 | plot_ly(OV.agg, 813 | x = ~year, 814 | y = ~OV_age_cat, 815 | type = 'scatter', 816 | mode = 'lines', 817 | name = "Age groups") %>% 818 | add_trace(OV.agg, 819 | x = ~year, 820 | y = ~OV_hinc_good, 821 | type = 'scatter', 822 | mode = 'lines', 823 | name = "Income groups", 824 | add = TRUE) %>% 825 | add_trace(OV.agg, 826 | x = ~year, 827 | y = ~OV_paid_work, 828 | type = 'scatter', 829 | mode = 'lines', 830 | name = "Work groups", 831 | add = TRUE) %>% 832 | add_trace(OV.agg, 833 | x = ~year, 834 | y = ~OV_education_cat, 835 | type = 'scatter', 836 | mode = 'lines', 837 | name = "Educ. groups", 838 | add = TRUE) %>% 839 | layout(yaxis = list(range = c(0.8,1), title = "Overlap coefficient"), 840 | xaxis = list(title = "Year", 841 | dtick = 2, 842 | tickangle = 45), autosize = F, height = 500, width = 800) 843 | ``` 844 | 845 | Table \@ref(tab:OV-trends) displays the estimates a very simple linear regression model to illustrate display the time trend. If shows there there is both an a deacrease in the overall average of trust, as well as an increase in the overall polarization of trust across countries. 846 | 847 | ```{r model-time-trend-OV, echo=FALSE, message=FALSE, warning=FALSE, results="asis"} 848 | library(stargazer) 849 | data.agg$year <- as.numeric(data.agg$year) 850 | fit1 <- lm(OV_age_cat ~ year + as.factor(country), data = OV) 851 | fit2 <- lm(OV_hinc_good ~ year + as.factor(country), data = OV) 852 | 853 | fit3 <- lm(OV_paid_work ~ year + as.factor(country), data = OV) 854 | fit4 <- lm(OV_education_cat ~ year + as.factor(country), data = OV) 855 | 856 | 857 | stargazer(fit1, fit2, fit3, fit4, 858 | type="latex", 859 | title = "Time trend of overlap coefficient", 860 | omit.stat=c("LL","ser","f","adj.rsq"), 861 | omit = c("country"), 862 | dep.var.caption = "Outcome: Overlap coefficient", 863 | dep.var.labels = c("Age groups", "Income groups", "Work groups", "Educ. groups"), 864 | ci=FALSE, digits=2, 865 | ci.level=0.95, 866 | single.row=FALSE, 867 | label = "tab:OV-trends", 868 | table.placement="H", 869 | # column.sep.width = "-7pt", 870 | align = TRUE, 871 | column.labels = c("M5", "M6", "M7", "M8"), 872 | model.names = FALSE, 873 | model.numbers = FALSE, 874 | star.cutoffs = c(0.05, 0.01, 0.001), 875 | notes = "* Data: European Social Survey; Country fixed-effects ", 876 | header=FALSE, 877 | no.space=TRUE, 878 | font.size = "footnotesize" 879 | ) 880 | ``` 881 | 882 | 883 | 884 | ```{r map1, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE, results="asis"} 885 | 886 | suppressPackageStartupMessages(library(sf)) 887 | 888 | world <- st_as_sf(rnaturalearth::countries110) 889 | europe <- dplyr::filter(world, region_un=="Europe" & name!='Russia') 890 | 891 | # A bounding box for continental Europe. 892 | europe.bbox <- st_polygon(list( 893 | matrix(c(-25,29,45,29,45,75,-25,75,-25,29),byrow = T,ncol = 2))) 894 | 895 | europe.clipped <- suppressWarnings(st_intersection(europe, st_sfc(europe.bbox, crs=st_crs(europe)))) 896 | 897 | 898 | ggplot(europe.clipped, aes(fill=gdp_md_est/1000)) + 899 | geom_sf(alpha=0.8,col='white') + 900 | coord_sf(crs="+proj=aea +lat_1=36.333333333333336 +lat_2=65.66666666666667 +lon_0=14") + 901 | hrbrthemes::theme_ipsum_rc() + 902 | viridis::scale_fill_viridis( 903 | name='Median GDP \n(in Billions)', direction = -1, labels=scales::dollar) + 904 | labs(x=NULL, y=NULL, title=NULL, 905 | caption='Source: http://www.naturalearthdata.com/') 906 | ``` 907 | 908 | ```{r map2, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE, results="asis"} 909 | 910 | 911 | # Compute the compound annual growth rate 912 | nuts2.df$cagr <- (((nuts2.df$pop2008 / nuts2.df$pop1999)^(1/9)) - 1) * 100 913 | 914 | # Set a custom color palette 915 | cols <- carto.pal(pal1 = "green.pal", # first color gradient 916 | n1 = 2, # number of colors in the first gradiant 917 | pal2 = "red.pal", # second color gradient 918 | n2 = 4) # number of colors in the second gradiant 919 | 920 | # Plot a layer with the extent of the EU28 countries with only a background color 921 | plot(nuts0.spdf, border = NA, col = NA, bg = "#A6CAE0") 922 | # Plot non european space 923 | plot(world.spdf, col = "#E3DEBF", border=NA, add=TRUE) 924 | 925 | # Plot the compound annual growth rate 926 | choroLayer(spdf = nuts2.spdf, # SpatialPolygonsDataFrame of the regions 927 | df = nuts2.df, # data frame with compound annual growth rate 928 | var = "cagr", # compound annual growth rate field in df 929 | breaks = c(-2.43,-1,0,0.5,1,2,3.1), # list of breaks 930 | col = cols, # colors 931 | border = "grey40", # color of the polygons borders 932 | lwd = 0.5, # width of the borders 933 | legend.pos = "right", # position of the legend 934 | legend.title.txt = "Compound Annual\nGrowth Rate", # title of the legend 935 | legend.values.rnd = 2, # number of decimal in the legend values 936 | add = TRUE) # add the layer to the current plot 937 | 938 | # Plot a layer of countries borders 939 | plot(nuts0.spdf,border = "grey20", lwd=0.75, add=TRUE) 940 | 941 | # Layout plot 942 | layoutLayer(title = "Demographic Trends", author = "cartography", 943 | sources = "Eurostat, 2008", frame = TRUE, col = NA, 944 | scale = NULL,coltitle = "black", 945 | south = TRUE) # add a south arrow 946 | ``` 947 | 948 | # Discussion and conclusion {#sec:conclusion} 949 | 950 | 951 | * Use different measure to measure between-subgroup polarization. Here we exogenously defined those groups following theoretical rationales. This pre-supposes that we have good ideas in the first place.. 952 | 953 | 954 | 955 | 956 | \normalsize 957 | 958 | 959 | \clearpage 960 | 961 | # Appendix 962 | ## Summary tables 963 | 964 | ```{r summary-stats, echo=FALSE, message=FALSE, warning=FALSE, cache=TRUE, results="asis"} 965 | # SUMMARY STATISTICS #### 966 | for(i in seq(2002, 2016, by = 2)){ 967 | stargazer(data %>% filter(year == i) %>% data.frame(), 968 | type="latex", label = paste(str_replace(i, "_", " "), " summary table", sep = ""), font.size="scriptsize", table.placement="!ht", 969 | column.sep.width = ".2pt" , title = paste(str_replace(i, "_", " "), " summary table", sep = ""), 970 | digits = 2, rownames = FALSE) 971 | } 972 | ``` 973 | 974 | 975 | 976 | 977 | \clearpage 978 | 979 | # References 980 | 981 | 982 | 983 | 984 | --------------------------------------------------------------------------------