├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── aaa.R ├── data-documentation.R ├── print.R ├── quanteda.sentiment-package.R ├── re-exports.R ├── textstat_polarity.R └── textstat_valence.R ├── README.Rmd ├── README.md ├── codecov.yml ├── data ├── data_dictionary_AFINN.rda ├── data_dictionary_ANEW.rda ├── data_dictionary_HuLiu.rda ├── data_dictionary_LSD2015.rda ├── data_dictionary_LoughranMcDonald.rda ├── data_dictionary_NRC.rda ├── data_dictionary_Rauh.rda ├── data_dictionary_geninqposneg.rda └── data_dictionary_sentiws.rda ├── inst ├── WORDLIST └── extdata │ └── afinn │ ├── AFINN-111.txt │ ├── AFINN-96.txt │ └── AFINN-README.txt ├── man ├── data_dictionary_AFINN.Rd ├── data_dictionary_ANEW.Rd ├── data_dictionary_HuLiu.Rd ├── data_dictionary_LSD2015.Rd ├── data_dictionary_LoughranMcDonald.Rd ├── data_dictionary_NRC.Rd ├── data_dictionary_Rauh.Rd ├── data_dictionary_geninqposneg.Rd ├── data_dictionary_sentiws.Rd ├── get_polarity_dictionary.Rd ├── images │ ├── unnamed-chunk-5-1.png │ └── unnamed-chunk-6-1.png ├── polarity.Rd ├── quanteda.sentiment-package.Rd ├── sentiment-functions.Rd ├── textstat_polarity.Rd ├── textstat_valence.Rd └── valence.Rd ├── sources ├── AFINN │ ├── AFINN-111.txt │ ├── AFINN-96.txt │ ├── AFINN-README.txt │ └── create-data_dictionary_AFINN.R ├── ANEW │ └── create-data_dictionary_ANEW.R ├── Hu-Liu │ ├── create_data_dictionary-HuLiu.R │ ├── negative-words-UTF8.txt │ └── positive-words.txt ├── Laver-Garry │ ├── Laver_and_Garry_2000.cat │ └── create-data_dictionary_LaverGarry.R ├── Loughran-McDonald │ ├── Loughran_and_McDonald_2014.cat │ └── create-data_dictionary_LoughranMcDonald.R ├── MFD │ ├── create-data_dictionary_MFD.R │ ├── mfd2.0.dic │ └── moral_foundations_dictionary.dic ├── NRC │ └── create_data_dictionary-NRC.R ├── RID │ ├── RID.CAT │ └── create-data_dictionary_RID.R ├── Rauh │ ├── Rauh_SentDictionaryGerman.Rdata │ ├── Rauh_SentDictionaryGerman_Negation.Rdata │ └── create-data_dictionary_Rauh.R ├── geninquirer │ ├── create-data_dictionary_geninquirer.R │ └── inquireraugmented.csv ├── make_sentiment_dictionaries.R ├── sentiws │ ├── create-data_dictionary_sentiws.R │ ├── sentiws_v1.8c_negative.txt │ └── sentiws_v1.8c_positive.txt ├── test-misc.R └── uk_us_english │ └── data_dict_usbr.csv ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── test-data.R │ ├── test-misc.R │ ├── test-test.R │ ├── test-textstat_polarity.R │ └── test-textstat_valence.R └── vignettes ├── .gitignore └── sentiment_analysis.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^appveyor\.yml$ 5 | ^codecov\.yml$ 6 | ^README\.Rmd$ 7 | ^\.github$ 8 | ^doc$ 9 | ^Meta$ 10 | sources 11 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | # Session Data files 5 | .RData 6 | # Example code in package build process 7 | *-Ex.R 8 | # Output files from R CMD build 9 | /*.tar.gz 10 | # Output files from R CMD check 11 | /*.Rcheck/ 12 | # RStudio files 13 | .Rproj.user/ 14 | # produced vignettes 15 | vignettes/*.html 16 | vignettes/*.pdf 17 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 18 | .httr-oauth 19 | # knitr and R markdown default cache directories 20 | /*_cache/ 21 | /cache/ 22 | # Temporary files created by R markdown 23 | *.utf8.md 24 | *.knit.md 25 | .Rproj.user 26 | *.Rproj 27 | inst/doc 28 | doc 29 | Meta 30 | .DS_Store 31 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: quanteda.sentiment 2 | Title: Sentiment Analysis using 'quanteda' 3 | Version: 0.31 4 | Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role = 5 | c("aut", "cre", "cph")) ) 6 | Description: Adds functions and dictionaries for computing sentiment using the 'quanteda' package. 7 | Depends: 8 | R (>= 4.1.0), 9 | quanteda (>= 3.2.1), 10 | methods 11 | Imports: 12 | Matrix, 13 | stringi 14 | License: GPL-3 15 | Encoding: UTF-8 16 | LazyData: true 17 | RoxygenNote: 7.3.1 18 | Suggests: 19 | covr, 20 | ggplot2, 21 | knitr, 22 | rmarkdown, 23 | spelling, 24 | testthat 25 | Roxygen: list(markdown = TRUE) 26 | Language: en-GB 27 | VignetteBuilder: knitr 28 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("polarity<-",dictionary2) 4 | S3method("valence<-",dictionary2) 5 | S3method(polarity,dictionary2) 6 | S3method(textstat_polarity,character) 7 | S3method(textstat_polarity,corpus) 8 | S3method(textstat_polarity,default) 9 | S3method(textstat_polarity,dfm) 10 | S3method(textstat_polarity,tokens) 11 | S3method(textstat_valence,character) 12 | S3method(textstat_valence,corpus) 13 | S3method(textstat_valence,default) 14 | S3method(textstat_valence,dfm) 15 | S3method(textstat_valence,tokens) 16 | S3method(valence,dictionary2) 17 | export("polarity<-") 18 | export("valence<-") 19 | export(polarity) 20 | export(sent_abspropdiff) 21 | export(sent_logit) 22 | export(sent_relpropdiff) 23 | export(textstat_polarity) 24 | export(textstat_valence) 25 | export(valence) 26 | import(methods) 27 | importFrom(Matrix,rowSums) 28 | importFrom(quanteda,as.dfm) 29 | importFrom(quanteda,convert) 30 | importFrom(quanteda,corpus) 31 | importFrom(quanteda,dfm) 32 | importFrom(quanteda,dfm_lookup) 33 | importFrom(quanteda,dictionary) 34 | importFrom(quanteda,tokens) 35 | importFrom(quanteda,tokens_lookup) 36 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # quanteda.sentiment 0.2x 2 | 3 | * Usability improvements 4 | * nested_scope = "dictionary" is now standard for applications on tokens, for textstat_polarity() (#12) 5 | * Minor changes in preparation for forthcoming v3 release. 6 | 7 | # quanteda.sentiment 0.1 8 | 9 | * Added a `NEWS.md` file to track changes to the package. 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /R/aaa.R: -------------------------------------------------------------------------------- 1 | build_dictionary2 <- quanteda:::build_dictionary2 2 | validate_dictionary <- quanteda:::validate_dictionary 3 | build_dictionary2 <- quanteda:::build_dictionary2 4 | -------------------------------------------------------------------------------- /R/data-documentation.R: -------------------------------------------------------------------------------- 1 | #' Lexicoder Sentiment Dictionary (2015) 2 | #' 3 | #' The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} [dictionary] 4 | #' format. 5 | #' 6 | #' @details 7 | #' The dictionary consists of 2,858 "negative" sentiment words and 1,709 8 | #' "positive" sentiment words. A further set of 2,860 and 1,721 negations of 9 | #' negative and positive words, respectively, is also included. While many users 10 | #' will find the non-negation sentiment forms of the LSD adequate for sentiment 11 | #' analysis, Young and Soroka (2012) did find a small, but non-negligible 12 | #' increase in performance when accounting for negations. Users wishing to test 13 | #' this or include the negations are encouraged to subtract negated positive 14 | #' words from the count of positive words, and subtract the negated negative 15 | #' words from the negative count. 16 | #' 17 | #' Young and Soroka (2012) also suggest the use of a pre-processing script to 18 | #' remove specific cases of some words (i.e., "good bye", or "nobody better", 19 | #' which should not be counted as positive). Pre-processing scripts are 20 | #' available at . 21 | #' @section License and Conditions: 22 | #' The LSD is available for non-commercial academic purposes only. By using 23 | #' `data_dictionary_LSD2015`, you accept these terms. 24 | #' 25 | #' Please cite the references below when using the dictionary. 26 | #' @format 27 | #' A [dictionary] of four keys containing glob-style [pattern 28 | #' matches][valuetype]. 29 | #' \describe{ 30 | #' \item{`negative`}{2,858 word patterns indicating negative sentiment} 31 | #' \item{`positive`}{1,709 word patterns indicating positive sentiment} 32 | #' \item{`neg_positive`}{1,721 word patterns indicating a positive word preceded 33 | #' by a negation (used to convey negative sentiment)} 34 | #' \item{`neg_negative`}{2,860 word patterns indicating a negative word preceded 35 | #' by a negation (used to convey positive sentiment)} 36 | #' } 37 | #' @references 38 | #' The objectives, development and reliability of the dictionary are discussed 39 | #' in detail in Young and Soroka (2012). Please cite this article when using 40 | #' the Lexicoder Sentiment Dictionary and related resources. 41 | # 42 | #' Young, L. & Soroka, S. (2012). *Lexicoder Sentiment 43 | #' Dictionary*. Available at . 44 | #' 45 | #' Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of 46 | #' Sentiment in Political Texts. *Political Communication*, 29(2), 205--231. 47 | #' \doi{10.1080/10584609.2012.671234} 48 | #' @keywords data 49 | #' @examples 50 | #' # checking polarity 51 | #' polarity(data_dictionary_LSD2015) 52 | #' 53 | #' # simple example 54 | #' library("quanteda") 55 | #' txt <- "This aggressive policy will not win friends." 56 | #' 57 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015, 58 | #' exclusive = FALSE) 59 | #' ## tokens from 1 document. 60 | #' ## text1 : 61 | #' ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE" "POSITIVE" "." 62 | #' 63 | #' # notice that double-counting of negated and non-negated terms is avoided 64 | #' # when using nested_scope = "dictionary" 65 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015, 66 | #' exclusive = FALSE, nested_scope = "dictionary") 67 | #' ## tokens from 1 document. 68 | #' ## text1 : 69 | #' ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE." 70 | #' 71 | #' # on larger examples - notice that few negations are used 72 | #' tail(data_corpus_inaugural) |> 73 | #' tokens() |> 74 | #' tokens_lookup(dictionary = data_dictionary_LSD2015) |> 75 | #' dfm() 76 | "data_dictionary_LSD2015" 77 | 78 | #' NRC Word-Emotion Association Lexicon 79 | #' 80 | #' @description 81 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Mohammad and 82 | #' Charron's (2010, 2013) English version of the NRC Word-Emotion Association 83 | #' Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight 84 | #' emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust) 85 | #' and two sentiments (negative and positive) manually annotated on Amazon's 86 | #' Mechanical Turk. 87 | #' 88 | #' @description 89 | #' The Sentiment and Emotion Lexicons is a collection of lexicons that was 90 | #' entirely created by the experts of the National Research Council of Canada. 91 | #' Developed with a wide range of applications, this lexicon collection can be 92 | #' used in a multitude of contexts such as sentiment analysis, product 93 | #' marketing, consumer behaviour and even political campaign analysis. 94 | #' 95 | #' @description 96 | #' The technology uses a list of words that help identify emotions, sentiment, 97 | #' as well as analyzing hashtags, emoticons and word-colour associations. The 98 | #' lexicons contain entries for English words, and can be used to analyze 99 | #' English texts. 100 | #' @references 101 | #' Mohammad, S. & Turney, P. (2013). [Crowdsourcing a Word-Emotion Association 102 | #' Lexicon](https://arxiv.org/abs/1308.6297). *Computational Intelligence*, 103 | #' 29(3), 436--465. 104 | #' 105 | #' Mohammad, S. & Turney, P. (2010). [Emotions Evoked by Common Words and 106 | #' Phrases: Using Mechanical Turk to Create an Emotion 107 | #' Lexicon](https://dl.acm.org/doi/10.5555/1860631.1860635). In *Proceedings of 108 | #' the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and 109 | #' Generation of Emotion in Text*, June 2010, LA, California. 110 | #' @source 111 | #' 112 | #' See also 113 | #' @section License and Terms of Use: 114 | #' Free for research purposes. 115 | #' 116 | #' For questions about the commercial license, email Pierre Charron (Client 117 | #' Relationship Leader at NRC): `Pierre.Charron@nrc-cnrc.gc.ca`. 118 | #' 119 | #' Terms of Use: 120 | #' * Cite the papers associated with the lexicons in your research papers and 121 | #' articles that make use of them. (The papers associated with each lexicon 122 | #' are listed below, and also in the READMEs for individual lexicons.) 123 | #' * In news articles and online posts on work using these lexicons, cite the 124 | #' appropriate lexicons. For example: "This application/product/tool makes 125 | #' use of the `resource name`, created by `author(s)` at the National 126 | #' Research Council Canada." (The creators of each lexicon are listed below. 127 | #' Also, if you send us an email, we will be thrilled to know about how you 128 | #' have used the lexicon.) If possible hyperlink to this page: 129 | #' . 130 | #' * If you use a lexicon in a product or application, then acknowledge this in 131 | #' the 'About' page and other relevant documentation of the application by 132 | #' stating the name of the resource, the authors, and NRC. For example: "This 133 | #' application/product/tool makes use of the `resource name`, created by 134 | #' `author(s)` at the National Research Council Canada." (The creators of 135 | #' each lexicon are listed below. Also, if you send us an email, we will be 136 | #' thrilled to know about how you have used the lexicon.) If possible 137 | #' hyperlink to this page: . 138 | #' * Do not redistribute the data. Direct interested parties to this page: 139 | #' . 140 | #' * National Research Council Canada (NRC) disclaims any responsibility for 141 | #' the use of the lexicons listed here and does not provide technical 142 | #' support. However, the contact listed above will be happy to respond to 143 | #' queries and clarifications. 144 | #' @note Technical and research-related questions can be addressed to Saif M. 145 | #' Mohammad (Senior Research Scientist at NRC): 146 | #' `Saif.Mohammad@nrc-cnrc.gc.ca`. 147 | #' @keywords data 148 | "data_dictionary_NRC" 149 | 150 | #' Positive and negative words from Hu and Liu (2004) 151 | #' 152 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing 2,006 153 | #' positive and 4,783 negative words from Hu and Liu (2004, 2005). 154 | #' @format 155 | #' A [dictionary] of fixed word patterns with two keys: 156 | #' * `positive`: 2,006 words with positive polarity 157 | #' * `negative`: 4,783 words with negative polarity 158 | #' @references 159 | #' Hu, M. & Liu, B. (2004). [Mining and Summarizing Customer 160 | #' Reviews](https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf). In 161 | #' Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery 162 | #' and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA. 163 | #' 164 | #' Liu, M., Hu, M., & Cheng, J. (2005). [Opinion Observer: Analyzing and 165 | #' Comparing Opinions on the 166 | #' Web](https://www.cs.uic.edu/~liub/publications/www05-p536.pdf). In 167 | #' Proceedings of the 14th International World Wide Web conference (WWW-2005), 168 | #' May 10--14, 2005, Chiba, Japan. 169 | #' @section License: 170 | #' Unknown. 171 | #' @source 172 | #' @keywords data 173 | "data_dictionary_HuLiu" 174 | 175 | #' Augmented General Inquirer *Positiv* and *Negativ* dictionary 176 | #' 177 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the 178 | #' *Positiv* and *Negativ* dictionary entries from the augmented 179 | #' General Inquirer. These are new valence categories described at 180 | #' `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also including the 181 | #' terms from the "yes" "no" dictionary entries. 182 | #' @format 183 | #' A [dictionary] of fixed word patterns with two keys: 184 | #' * `positive`: 1,653 words with positive polarity 185 | #' * `negative`: 2,010 words with negative polarity 186 | #' @references Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966). 187 | #' *The General Inquirer: A Computer Approach to Content Analysis.* 188 | #' Cambridge, MA: MIT Press. 189 | #' @source `http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm` -- 190 | #' although this site ceased operating some time in 2021 191 | #' @keywords data 192 | "data_dictionary_geninqposneg" 193 | 194 | #' SentimentWortschatz (SentiWS) 195 | #' 196 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing 197 | #' SentimentWortschatz (SentiWS), a publicly available German-language resource 198 | #' for sentiment analysis. The current version of SentiWS contains 1,650 199 | #' positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 200 | #' negative word forms including their inflections. It not only contains 201 | #' adjectives and adverbs explicitly expressing a sentiment, but also nouns and 202 | #' verbs implicitly containing one. The original dictionary weights within the 203 | #' interval of -1 to 1. Note that the version implemented in 204 | #' \pkg{quanteda.dictionaries} uses a binary classification into positive 205 | #' (weight > 0) and negative (weight < 0) features. 206 | #' @source 207 | #' @references 208 | #' Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly 209 | #' Available German-language Resource for Sentiment 210 | #' Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf). 211 | #' In _Proceedings of the 7th International Language Ressources and Evaluation 212 | #' (LREC'10)_, 1168--1171. 213 | #' 214 | #' @keywords data 215 | "data_dictionary_sentiws" 216 | 217 | #' Nielsen's (2011) 'new ANEW' valenced word list 218 | #' 219 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Finn Årup 220 | #' Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of 221 | #' English words rated for valence with values between -5 (negative) and +5 222 | #' (positive). AFINN-111, the latest version, contains 2,477 words and phrases. 223 | #' @source 224 | #' @references 225 | #' Nielsen, F. Å. (2011). [A new ANEW: Evaluation of a Word List for Sentiment 226 | #' Analysis in Microblogs.](https://arxiv.org/abs/1103.2903) In *Proceedings 227 | #' of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come 228 | #' in Small Packages*, 93--98. 229 | #' @format 230 | #' A [dictionary] with one key, `AFINN`, with valences from -5 (negative) to +5 231 | #' (positive). 232 | #' @section License: 233 | #' [Open Database License (ODbL) v1.0](https://opendatacommons.org/licenses/odbl/1-0/) 234 | #' @keywords data 235 | "data_dictionary_AFINN" 236 | 237 | #' Affective Norms for English Words (ANEW) 238 | #' 239 | #' A quanteda dictionary object containing the ANEW, or Affective Norms for 240 | #' English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a 241 | #' lexicon of 2,471 distinct fixed word matches that are associated with three 242 | #' valenced categories: pleasure, arousal, and dominance. 243 | #' @format 244 | #' A [dictionary] with three valenced keys: `pleasure`, `arousal`, and 245 | #' `dominance`, each with valences from 1 to 9 and containing the same 2,471 246 | #' fixed word values. 247 | #' @section License: 248 | #' ANEW Statement of Use 249 | #' 250 | #' In accepting the ANEW materials, I agree not to make the ANEW available to 251 | #' the media (television, magazines, etc.) or to place them on any internet or 252 | #' computer-accessible websites. I also agree not to publish the ANEW in any 253 | #' print format – including JOURNALS, newspapers, etc. I also agree that I will 254 | #' not provide the ANEW materials to profit making companies or organizations 255 | #' and I agree not to distribute my username and password to unauthorized 256 | #' parties. 257 | #' @keywords data 258 | "data_dictionary_ANEW" 259 | 260 | #' Rauh's German Political Sentiment Dictionary 261 | #' 262 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the 263 | #' dictionaries provided in Rauh (forthcoming). Rauh assesses its performance 264 | #' against human intuition of sentiment in German political language 265 | #' (parliamentary speeches, party manifestos, and media coverage). The resource 266 | #' builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and 267 | #' GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the 268 | #' negation correction provided by the dictionary, currently a combination of 269 | #' [tokens_replace][quanteda::tokens_replace] and [tokens_compound][quanteda::tokens_compound] is 270 | #' required to harmonize the five covered bi-gram patterns prior to scoring. The 271 | #' example below shows how to conduct this transformation. Note that the process 272 | #' changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering 273 | #' some of the features of the original corpus. 274 | #' @format The dictionary has four keys. 275 | #' \describe{ 276 | #' \item{`negative`}{19,750 terms indicating negative sentiment} 277 | #' \item{`positive`}{17,330 terms indicating positive sentiment} 278 | #' \item{`neg_positive`}{17,330 terms indicating a positive word preceded 279 | #' by a negation (used to convey negative sentiment)} 280 | #' \item{`neg_negative`}{19,750 terms indicating a negative word preceded 281 | #' by a negation (used to convey positive sentiment)} 282 | #' } 283 | #' @source \doi{10.7910/DVN/BKBXWD} 284 | #' @references 285 | #' Rauh, C. (2018). Validating a Sentiment Dictionary for German Political 286 | #' Language: A Workbench Note. 287 | #' *Journal of Information Technology & Politics*, 15(4), 319--343. 288 | #' \doi{10.1080/19331681.2018.1485608} 289 | #' 290 | #' Remus, R., Quasthoff U., & Heyer, G. (2010). "[SentiWS - a Publicly 291 | #' Available German-language Resource for Sentiment 292 | #' Analysis.](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf)" 293 | #' In *Proceedings of the 7th International Language Resources and Evaluation 294 | #' (LREC'10)*, 1168--1171. 295 | #' 296 | #' Waltinger, U. (2010). "[GermanPolarityClues: A Lexical Resource for German 297 | #' Sentiment Analysis](http://www.ulliwaltinger.de/pdf/91_Paper.pdf)." In 298 | #' *International Conference on Language Resources and Evaluation*, 17--23 May 299 | #' 2010 LREC'10. 300 | #' @examples 301 | #' \donttest{ 302 | #' # tokenize example text 303 | #' toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch") 304 | #' # replace negation markers with "not" 305 | #' toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein", 306 | #' "keine", "keinen"), 307 | #' replacement = rep("not", 5)) 308 | #' # compound bi-gram negation patterns 309 | #' toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ") 310 | #' 311 | #' # apply dictionary 312 | #' tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |> 313 | #' dfm() 314 | #' } 315 | #' @keywords data 316 | "data_dictionary_Rauh" 317 | 318 | #' Loughran and McDonald Sentiment Word Lists 319 | #' 320 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing 321 | #' the 2014 version of the Loughran and McDonald Sentiment Word Lists. The 322 | #' categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903), 323 | #' "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68) 324 | #' and "modal words weak" (0). 325 | #' @source 326 | #' @references 327 | #' Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability? 328 | #' Textual Analysis, Dictionaries, and 10-Ks. 329 | #' *Journal of Finance*, 66(1), 35--65. \doi{10.1111/j.1540-6261.2010.01625.x} 330 | #' @keywords data 331 | "data_dictionary_LoughranMcDonald" 332 | 333 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | print_dictionary <- quanteda:::print_dictionary 2 | 3 | # new method for printing sentiment dictionaries 4 | #' @import methods 5 | setMethod("print", signature(x = "dictionary2"), 6 | function(x, 7 | max_nkey = quanteda::quanteda_options("print_dictionary_max_nkey"), 8 | max_nval = quanteda::quanteda_options("print_dictionary_max_nval"), 9 | show_summary = quanteda::quanteda_options("print_dictionary_summary"), 10 | ...) { 11 | x <- quanteda::as.dictionary(x) 12 | if (show_summary) { 13 | depth <- dictionary_depth(x) 14 | lev <- if (depth > 1L) " primary" else "" 15 | nkey <- length(names(x)) 16 | cat("Dictionary object with ", as.character(nkey), lev, " key entr", 17 | if (nkey == 1L) "y" else "ies", sep = "") 18 | if (lev != "") cat(" and ", as.character(depth), " nested levels", sep = "") 19 | cat(".\n") 20 | if (!is.null(polarity(x))) { 21 | cat("Polarities: ") 22 | poles <- lapply(polarity(x), function(y) paste0("\"", y, "\"")) 23 | cat(mapply(paste, names(poles), "=", 24 | unname(sapply(poles, paste, collapse = ", "))) |> 25 | paste(collapse = "; "), 26 | "\n") 27 | } 28 | if (!is.null(valence(x))) { 29 | cat("Valences set for keys: ") 30 | cat(paste(names(valence(x)), collapse = ", "), "\n") 31 | } 32 | } 33 | invisible(print_dictionary(x, 1, max_nkey, max_nval, ...)) 34 | }) 35 | -------------------------------------------------------------------------------- /R/quanteda.sentiment-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | ## usethis namespace: end 8 | NULL 9 | -------------------------------------------------------------------------------- /R/re-exports.R: -------------------------------------------------------------------------------- 1 | friendly_class_undefined_message <- quanteda:::friendly_class_undefined_message 2 | dictionary_depth <- quanteda:::dictionary_depth 3 | print_dictionary <- quanteda:::print_dictionary 4 | -------------------------------------------------------------------------------- /R/textstat_polarity.R: -------------------------------------------------------------------------------- 1 | # textstat_polarity ---------------- 2 | 3 | #' Compute sentiment from key polarities 4 | #' 5 | #' Compute sentiment scores using a polarity approach, based on assigned 6 | #' categories (types or features) of positive, negative, and neutral sentiment. 7 | #' Several formulas for combining the polar categories are available, or the 8 | #' user can supply a custom function. 9 | #' @param x a character, [corpus], [tokens], or [dfm] object containing 10 | #' text, tokens, or features whose sentiment will be scored 11 | #' @param dictionary a [dictionary] that has [polarity] set, indicating which 12 | #' keys are associated with positive, negative, and (optionally) neutral 13 | #' sentiment 14 | #' @param fun function; the formula for computing sentiment, which must refer to 15 | #' `pos`, `neg`, and (optionally) `neut`. The default is the "logit" scale 16 | #' (Lowe et al 2011) which is the log of (positive / negative) counts. See 17 | #' [sentiment-functions] for details and for additional available functions, 18 | #' as well as details on how to supply custom functions. 19 | #' @param ... additional arguments passed to `fun` 20 | #' @return a [data.frame] of sentiment scores 21 | #' @export 22 | #' @references Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 23 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies 24 | #' Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x} 25 | #' @examples 26 | #' library("quanteda") 27 | #' corp <- tail(data_corpus_inaugural, n = 5) 28 | #' toks <- tokens(corp) 29 | #' dfmat <- dfm(toks) 30 | #' polar1 <- list(pos = "positive", neg = "negative") 31 | #' polar2 <- list(pos = c("positive", "neg_negative"), 32 | #' neg = c("negative", "neg_positive")) 33 | #' 34 | #' polarity(data_dictionary_LSD2015) <- polar1 35 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 36 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015) 37 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015) 38 | #' 39 | #' polarity(data_dictionary_LSD2015) <- polar2 40 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 41 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015) 42 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 43 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015) 44 | #' 45 | #' # with a user-supplied function 46 | #' sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100 47 | #' textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn) 48 | textstat_polarity <- function(x, dictionary, fun = sent_logit, ...) { 49 | UseMethod("textstat_polarity") 50 | } 51 | 52 | #' @export 53 | textstat_polarity.default <- function(x, dictionary, fun = sent_logit, ...) { 54 | stop(friendly_class_undefined_message(class(x), "textstat_polarity")) 55 | } 56 | 57 | #' @importFrom quanteda corpus 58 | #' @export 59 | textstat_polarity.character <- function(x, ...) { 60 | textstat_polarity(corpus(x), ...) 61 | } 62 | 63 | #' @importFrom quanteda tokens 64 | #' @export 65 | textstat_polarity.corpus <- function(x, ...) { 66 | textstat_polarity(tokens(x), ...) 67 | } 68 | 69 | #' @importFrom quanteda dictionary tokens_lookup dfm 70 | #' @export 71 | textstat_polarity.tokens <- function(x, dictionary, ...) { 72 | dict <- get_polarity_dictionary(dictionary) 73 | poldict <- dictionary(polarity(dict)) 74 | polarity(poldict) <- polarity(dict) 75 | 76 | tokens(x) |> 77 | tokens_lookup(dictionary = dict, nomatch = "other", nested_scope = "dictionary") |> 78 | dfm() |> 79 | textstat_polarity(dictionary = poldict, ...) 80 | } 81 | 82 | #' @importFrom quanteda convert dfm_lookup as.dfm 83 | #' @export 84 | textstat_polarity.dfm <- function(x, dictionary, fun = sent_logit, ...) { 85 | dict <- get_polarity_dictionary(dictionary) 86 | 87 | result <- fun(dfm_lookup(x, dict, nomatch = "other"), ...) 88 | result <- convert(as.dfm(result), to = "data.frame") 89 | names(result)[2] <- "sentiment" 90 | 91 | class(result) <- c("sentiment", "textstat", "data.frame") 92 | attr(result, "fun") <- fun 93 | attr(result, "fun_name") <- as.character(substitute(fun)) 94 | 95 | result 96 | } 97 | 98 | 99 | # polarity setting and checking functions -------------- 100 | 101 | #' Set or get the sentiment polarity of a dictionary 102 | #' 103 | #' Set or retrieve the polarity of a [dictionary] object for the purposes of 104 | #' sentiment analysis. Polarity consists of a set of dictionary keys that are 105 | #' associated with positive, negative, and (optionally) neutral categories for 106 | #' use in [textstat_polarity()]. 107 | #' 108 | #' A dictionary may have only one set of polarities at a time, but may be 109 | #' changed as needed. 110 | #' @param x a [dictionary] object 111 | #' @return `polarity()` returns the polarity as a list. 112 | #' @keywords dictionary textstat utility 113 | #' @export 114 | #' 115 | #' @examples 116 | #' library("quanteda") 117 | #' simpledict <- dictionary(list( 118 | #' happy = c("happy", "jubilant", "exuberant"), 119 | #' sad = c("sad", "morose", "down") 120 | #' )) 121 | #' polarity(simpledict) 122 | #' polarity(simpledict) <- list(pos = "happy", neg = "sad") 123 | #' polarity(simpledict) 124 | #' 125 | #' # can list multiple keys 126 | #' polarity(data_dictionary_LSD2015) <- list( 127 | #' pos = c("positive", "neg_negative"), 128 | #' neg = c("negative", "neg_positive") 129 | #' ) 130 | #' polarity(data_dictionary_LSD2015) 131 | polarity <- function(x) { 132 | UseMethod("polarity") 133 | } 134 | 135 | #' @export 136 | polarity.dictionary2 <- function(x) { 137 | x@meta$object$polarity 138 | } 139 | 140 | #' @rdname polarity 141 | #' @param value list consisting of named character vectors `pos`, `neg`, and 142 | #' (optionally) `neut` corresponding to positive, negative, and neutral 143 | #' sentiment categories respectively. Each element may contain multiple 144 | #' key names. The `neut` category is optional but `pos` and `neg` must be 145 | #' supplied. 146 | #' @return `polarity<-` sets the dictionary's polarity. 147 | #' @export 148 | "polarity<-" <- function(x, value) { 149 | UseMethod("polarity<-") 150 | } 151 | 152 | #' @export 153 | "polarity<-.dictionary2" <- function(x, value) { 154 | if (!is.null(value)) { 155 | if (!setequal(union(c("pos", "neg", "neut"), names(value)), 156 | c("pos", "neg", "neut")) || 157 | !is.list(value)) { 158 | stop("value must be a list of 'pos', 'neg', and (optionally) 'neut'", 159 | call. = FALSE) 160 | } 161 | check_that_poles_exist(x, value) 162 | } else { 163 | if (is.null(valence(x))) class(x) <- "dictionary2" 164 | } 165 | 166 | x@meta$object$polarity <- value 167 | x 168 | } 169 | 170 | #' Get a standard polarity dictionary for sentiment analysis 171 | #' 172 | #' Checks and standardizes a [dictionary] object with its [polarity] set, so 173 | #' that the polarity categories are standardized into the keys `pos`, `neg`, and 174 | #' (optionally) `neut`. Also checks that the dictionary contains all of the 175 | #' keys named in the polarity object. (It is necessary to check here since the 176 | #' dictionary could have been subset after creation.) 177 | #' @param dictionary a \pkg{quanteda} [dictionary] 178 | #' @return a single-level [dictionary] with keys `pos`, `neg`, and (optionally) 179 | #' `neut`. 180 | #' @keywords internal 181 | get_polarity_dictionary <- function(dictionary) { 182 | poles <- polarity(dictionary) 183 | 184 | # check the poles 185 | if (is.null(poles)) { 186 | stop("polarity is not set for this dictionary; see ?polarity", 187 | call. = FALSE) 188 | } 189 | check_that_poles_exist(dictionary, poles) 190 | 191 | # standardize the dictionary 192 | dictlist <- list( 193 | pos = unlist(dictionary[poles$pos], use.names = FALSE), 194 | neg = unlist(dictionary[poles$neg], use.names = FALSE), 195 | neut = unlist(dictionary[poles$neut], use.names = FALSE) 196 | ) 197 | dict <- dictionary(dictlist[!sapply(dictlist, is.null)]) 198 | 199 | # set the polarity to the keys 200 | newpoles <- list(pos = "pos", neg = "neg") 201 | if (!is.null(dictlist$neut)) newpoles <- c(newpoles, list(neut = "neut")) 202 | polarity(dict) <- newpoles 203 | 204 | return(dict) 205 | } 206 | 207 | 208 | check_that_poles_exist <- function(dictionary, poles) { 209 | poles <- unlist(poles, use.names = FALSE) 210 | polematch <- poles %in% names(dictionary) 211 | if (!all(polematch)) { 212 | stop("'", poles[!polematch], "' key not found in this dictionary", 213 | call. = FALSE) 214 | } 215 | } 216 | 217 | # sentiment formula functions -------------- 218 | 219 | #' Sentiment functions 220 | #' 221 | #' Functions for computing sentiment, for [textstat_polarity()]. Each function 222 | #' takes an input [dfm] with fixed feature names (see Details), and returns a 223 | #' sparse Matrix with a single column representing the results of the sentiment 224 | #' calculation. 225 | #' 226 | #' @details 227 | #' User supplied functions must take `x` and optional additional arguments, such 228 | #' as `smooth` for a smoothing constant for the logit scaling function. feature 229 | #' names for the sentiment categories `pos`, `neg`, `neut`, and `other`. (The 230 | #' `other` category is only required when a scaling function needs the count of 231 | #' non-sentiment associated features.) 232 | #' 233 | #' Additional arguments may be passed via `...`, such as `smooth` for the logit 234 | #' scale. 235 | #' 236 | #' @param x a [dfm] that has the following required feature names: `pos`, 237 | #' `neg`, `neut`, and `other` 238 | #' @return a sparse \pkg{Matrix} object of documents by sentiment score, where 239 | #' the sentiment score is the only column. (Its name is unimportant as this 240 | #' will not be used by [textstat_polarity()].) 241 | #' @keywords textstat internal 242 | #' @references Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 243 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies 244 | #' Quarterly_, 36(1), 123–155. 245 | #' \doi{10.1111/j.1939-9162.2010.00006.x} 246 | #' @name sentiment-functions 247 | #' @examples 248 | #' library("quanteda") 249 | #' dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |> 250 | #' tokens() |> 251 | #' dfm() 252 | #' sent_logit(dfmat) 253 | #' sent_abspropdiff(dfmat) 254 | #' 255 | #' # user-supplied function 256 | #' my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100 257 | #' my_sent_fn(dfmat) 258 | #' 259 | #' # user supplied function with fixed weights and using neutral category 260 | #' dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |> 261 | #' tokens() |> 262 | #' dfm() 263 | #' my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3 264 | #' my_sent_fn2(dfmat2) 265 | NULL 266 | 267 | #' @description `sent_logit` is \eqn{log(\frac{pos}{neg})}. 268 | #' @rdname sentiment-functions 269 | #' @param smooth additional smoothing function added to `pos` and `neg` before 270 | #' logarithmic transformation 271 | #' @export 272 | sent_logit <- function(x, smooth = 0.5) { 273 | log(x[, "pos"] + smooth) - log(x[, "neg"] + smooth) 274 | } 275 | 276 | #' @description `sent_abspropdiff` is \eqn{\frac{pos - neg}{N}}, where \eqn{N} 277 | #' is the total number of all features in a document. 278 | #' @rdname sentiment-functions 279 | #' @importFrom Matrix rowSums 280 | #' @export 281 | sent_abspropdiff <- function(x) { 282 | (x[, "pos"] - x[, "neg"]) / Matrix::rowSums(x) 283 | } 284 | 285 | #' @description `sent_relpropdiff` is \eqn{\frac{pos - neg}{pos + neg}}. 286 | #' @rdname sentiment-functions 287 | #' @export 288 | sent_relpropdiff <- function(x) { 289 | (x[, "pos"] - x[, "neg"]) / (x[, "pos"] + x[, "neg"]) 290 | } 291 | -------------------------------------------------------------------------------- /R/textstat_valence.R: -------------------------------------------------------------------------------- 1 | # textstat_valence ---------------- 2 | 3 | #' Compute sentiment from word valences 4 | #' 5 | #' Compute sentiment scores from tokens or document-feature matrices, based on 6 | #' the valences of dictionary keys and values. 7 | #' @param x a character, [corpus], [tokens], or [dfm] object containing 8 | #' text, tokens, or features whose sentiment will be scored. 9 | #' @param dictionary a \pkg{quanteda} [dictionary] that has [valence] set, in 10 | #' the form of numerical valences associated with sentiment 11 | #' @param normalization the baseline for normalizing the sentiment counts after 12 | #' scoring. Sentiment scores within keys are weighted means of the tokens 13 | #' matched to dictionary values, weighted by their valences. The default 14 | #' `"dictionary"` is to average over only the valenced words. `"all"` 15 | #' averages across all tokens, and `"none"` does no normalization. 16 | #' @param ... not used here 17 | #' @return a data.frame of sentiment scores 18 | #' @note 19 | #' If the input item is a [dfm], then multi-word values will not be matched 20 | #' unless the features of the [dfm] have been compounded previously. The input 21 | #' objects should not have had dictionaries applied previously. 22 | #' @export 23 | #' @references 24 | #' For a discussion of how to aggregate sentiment scores to the document 25 | #' level, see: 26 | #' 27 | #' Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 28 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies 29 | #' Quarterly_, 36(1), 123–155. 30 | #' \doi{10.1111/j.1939-9162.2010.00006.x} 31 | #' @seealso [valence()] 32 | #' @examples 33 | #' library("quanteda") 34 | #' \dontrun{ 35 | #' 36 | #' # AFINN 37 | #' afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 38 | #' package = "quanteda.sentiment"), 39 | #' header = FALSE, col.names = c("word", "valence")) 40 | #' data_dictionary_afinn <- dictionary(list(afinn = afinn$word)) 41 | #' valence(data_dictionary_afinn) <- list(afinn = afinn$valence) 42 | #' textstat_valence(toks, dictionary = data_dictionary_afinn) 43 | #' 44 | #' # ANEW 45 | #' anew <- read.delim(url("https://bit.ly/2zZ44w0")) 46 | #' anew <- anew[!duplicated(anew$Word), ] # because some words repeat 47 | #' data_dictionary_anew <- dictionary(list(pleasure = anew$Word, 48 | #' arousal = anew$Word, 49 | #' dominance = anew$Word)) 50 | #' valence(data_dictionary_anew) <- list(pleasure = anew$ValMn, 51 | #' arousal = anew$AroMn, 52 | #' dominance = anew$DomMn) 53 | #' textstat_valence(toks, data_dictionary_anew["pleasure"]) 54 | #' textstat_valence(toks, data_dictionary_anew["arousal"])} 55 | #' 56 | textstat_valence <- function(x, dictionary, 57 | normalization = c("dictionary", "all", "none"), ...) { 58 | UseMethod("textstat_valence") 59 | } 60 | 61 | #' @export 62 | textstat_valence.default <- function(x, dictionary, 63 | normalization = c("dictionary", "all", "none"), ...) { 64 | stop(friendly_class_undefined_message(class(x), "textstat_valence")) 65 | } 66 | 67 | #' @export 68 | textstat_valence.character <- function(x, ...) { 69 | textstat_valence(corpus(x), ...) 70 | } 71 | 72 | #' @export 73 | textstat_valence.corpus <- function(x, ...) { 74 | textstat_valence(tokens(x), ...) 75 | } 76 | 77 | #' @export 78 | textstat_valence.tokens <- function(x, dictionary, 79 | normalization = c("dictionary", "all", "none"), ...) { 80 | normalization <- match.arg(normalization) 81 | valence(dictionary) <- set_valences(dictionary, valence(dictionary)) 82 | numdict <- dictionary(as.list(flip_valence(dictionary))) 83 | quanteda::as.tokens(x) |> 84 | tokens_lookup(dictionary = numdict, nomatch = "other", 85 | nested_scope = "dictionary") |> 86 | dfm() |> 87 | aggregate_valence(norm = normalization) 88 | } 89 | 90 | #' @export 91 | textstat_valence.dfm <- function(x, dictionary, 92 | normalization = c("dictionary", "all", "none"), ...) { 93 | normalization <- match.arg(normalization) 94 | valence(dictionary) <- set_valences(dictionary, valence(dictionary)) 95 | numdict <- dictionary(as.list(flip_valence(dictionary))) 96 | as.dfm(x) |> 97 | dfm_lookup(dictionary = numdict, nomatch = "other") |> 98 | aggregate_valence(norm = normalization) 99 | } 100 | 101 | # internal sentiment calculation functions ----------- 102 | 103 | # uses Kohei's approach to make the valence values into the keys, and 104 | # then groups all values together under that score 105 | flip_valence <- function(dictionary) { 106 | v <- valence(dictionary) 107 | if (is.null(v)) stop("valence not set") 108 | 109 | structure(unlist(sapply(v, names), use.names = FALSE), 110 | names = unlist(v, use.names = FALSE)) 111 | } 112 | 113 | aggregate_valence <- function(x, norm = c("dictionary", "all", "none")) { 114 | norm <- match.arg(norm) 115 | other_index <- match("other", colnames(x)) 116 | if (norm == "dictionary") { 117 | denom <- rowSums(x[, -other_index]) 118 | } else if (norm == "all") { 119 | denom <- rowSums(x) 120 | } else if (norm == "none") { 121 | denom <- 1 122 | } 123 | x <- x[, -other_index] 124 | result <- data.frame(doc_id = quanteda::docnames(x), 125 | sentiment = as.vector(x %*% as.numeric(colnames(x)) 126 | / denom)) 127 | result$sentiment[is.na(result$sentiment)] <- 0 128 | result 129 | } 130 | 131 | # valence setting and checking functions -------------- 132 | 133 | #' Set or get the valences of dictionary values or keys 134 | #' 135 | #' Set or retrieve the valences of a [dictionary] object for the purposes of 136 | #' sentiment analysis. Valences consist of numerical values attached to each 137 | #' dictionary "value". For dictionaries with a more "polarity"-based approach, 138 | #' see [textstat_polarity()] 139 | #' 140 | #' Valences are used only in [textstat_valence()]. 141 | #' 142 | #' A dictionary may have only one set of valences at a time, but may be 143 | #' changed as needed. 144 | #' @param x a \pkg{quanteda} [dictionary][quanteda::dictionary] object 145 | #' @return `valences()` returns the valences as a list named numeric vectors, 146 | #' where each list element corresponds to a key in the dictionary, and each 147 | #' numeric element matches a value within that key. 148 | #' @keywords dictionary textstat utility 149 | #' @seealso [textstat_valence()], [valence()] 150 | #' @export 151 | #' 152 | #' @examples 153 | #' library("quanteda") 154 | #' 155 | #' # setting valences 156 | #' dict <- dictionary(list( 157 | #' happiness = c("happy", "jubilant", "exuberant", "content"), 158 | #' anger = c("mad", "peeved", "irate", "furious", "livid") 159 | #' )) 160 | #' valence(dict) 161 | #' # using a 5-point scale: 1:1 match 162 | #' valence(dict) <- list(happiness = c(3, 4, 5, 2), 163 | #' anger = c(3.1, 2.4, 2.9, 4.1, 5.0)) 164 | #' valence(dict) 165 | #' # with single valences applied to all values within the keys 166 | #' valence(dict) <- c(happiness = 1, anger = -1) 167 | #' valence(dict) 168 | #' # with named elements - order does not matter 169 | #' valence(dict) <- list( 170 | #' happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2) 171 | #' ) 172 | #' valence(dict) 173 | #' 174 | valence <- function(x) { 175 | UseMethod("valence") 176 | } 177 | 178 | #' @export 179 | valence.dictionary2 <- function(x) { 180 | x@meta$object$valence 181 | } 182 | 183 | #' @rdname valence 184 | #' @param value named list consisting of numerical value. The names of the 185 | #' elements must correspond to a dictionary key. Each element must be: 186 | #' * a single numeric value that will be applied to all of the dictionary 187 | #' values in that key; or 188 | #' * a vector of numeric values that matches the length and order of the 189 | #' dictionary values in that key; or 190 | #' * a named numeric vector where each element name matches dictionary values 191 | #' in the key. 192 | #' @return `valence<-` sets the dictionary's valences. 193 | #' @export 194 | "valence<-" <- function(x, value) { 195 | UseMethod("valence<-") 196 | } 197 | 198 | #' @export 199 | "valence<-.dictionary2" <- function(x, value) { 200 | if (!is.null(value)) { 201 | value <- as.list(value) 202 | check_valences(x, value) 203 | x@meta$object$valence <- set_valences(x, value) 204 | } else { 205 | x@meta$object$valence <- NULL 206 | if (!is.null(polarity(x))) class(x) <- "dictionary2" 207 | } 208 | x 209 | } 210 | 211 | dictionary_depth <- quanteda:::dictionary_depth 212 | 213 | check_valences <- function(dictionary, valences) { 214 | if (dictionary_depth(dictionary) > 1) 215 | stop("valenced dictionaries cannot be nested", call. = FALSE) 216 | if (!is.list(valences) || any(names(valences) == "")) 217 | stop("valence must be a fully named list", call. = FALSE) 218 | for (key in names(valences)) { 219 | if (!key %in% names(dictionary)) 220 | stop("'", key, "' is not a dictionary key", call. = FALSE) 221 | if (!is.numeric(valences[[key]])) 222 | stop("valence values must be numeric", call. = FALSE) 223 | if (length(valences[[key]]) != 1 && 224 | length(valences[[key]]) != length(dictionary[[key]])) 225 | stop("valence value length not equal to number of values for key '", 226 | key, "'", call. = FALSE) 227 | } 228 | } 229 | 230 | set_valences <- function(dictionary, valences) { 231 | # only use valences for keys in dictionary 232 | valences <- valences[names(valences) %in% names(dictionary)] 233 | if (!length(valences)) 234 | stop("no valenced keys found") 235 | 236 | for (key in names(valences)) { 237 | # repeat valences if only a single value is supplied 238 | if (length(valences[[key]]) == 1) 239 | valences[[key]] <- rep(valences[[key]], length(dictionary[[key]])) 240 | # use dictionary values as names if none supplied 241 | if (length(names(valences[[key]])) != length(valences[[key]])) 242 | names(valences[[key]]) <- dictionary[[key]] 243 | } 244 | valences 245 | } 246 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r, echo = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "##", 9 | fig.path = "man/images/" 10 | ) 11 | ``` 12 | ```{r echo = FALSE, results = "hide", message = FALSE} 13 | library("badger") 14 | ``` 15 | 16 | # quanteda.sentiment 17 | 18 | 19 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/quanteda.sentiment)](https://cran.r-project.org/package=quanteda.sentiment) 20 | `r badge_devel("quanteda/quanteda.sentiment", "royalblue")` 21 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 22 | [![Codecov test coverage](https://codecov.io/gh/quanteda/quanteda.sentiment/branch/master/graph/badge.svg)](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master) 23 | [![R-CMD-check](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml) 24 | 25 | 26 | ## Installation 27 | 28 | You can install **quanteda.sentiment** from GitHub with: 29 | 30 | ```{r eval = FALSE} 31 | remotes::install_github("quanteda/quanteda.sentiment") 32 | ``` 33 | 34 | The package is not yet on CRAN. 35 | 36 | ## About 37 | 38 | **quanteda.sentiment** extends the **quanteda** package with functions for computing sentiment on text. It has two main functions, for computing two types of sentiment. These follow the structure of a **quanteda** dictionary, which consists of _key_ entries expressing the canonical concept, and _value_ patterns (such as "good", "sad*", etc.) to be matched in a text and counted as occurrences of that key. 39 | 40 | The approach to sentiment in this package approaches sentiment computation in two ways, depending on whether sentiment is considered a key attribute, in which case the keys are assigned a _polarity_ such as _positive_ or _negative_, or whether individual values are assigned a _valence_, in the form of some continuous value indicating a degree of sentiment. Each is implemented in a separate function: 41 | 42 | * **Polarity-based sentiment.** This is implemented via `textstat_polarity()`, for computing a sentiment based on keys set as "poles" of positive versus negative sentiment. Setting polarity is dones through the `polarity()<-` function and can be set for any dictionary, for any keys. "Sentiment" here can be broadly construed as any contrasting pair of poles, such as "Democrat" versus "Republican", for instance. More than one key can be associated with the same pole. 43 | 44 | Polar values are converted into sentiment scores using a flexible function, such as $\mathrm{log}(pos / neg)$, or $(pos - neg)/(pos + neg)$. **quanteda.sentiment** offers three built-in functions, but the user can supply any function for combining polarities. 45 | 46 | * **Valence-based sentiment.** This is implemented via `textstat_valence()`, for computing sentiment as the average valence of a document's words, based on a dictionary whose values have numeric valence scores. Valence scores are set using the `valence()<-` function. Each key in a dictionary may have values with difference valences. 47 | 48 | The package comes with the following built-in dictionaries: 49 | 50 | | Name | Description | Polarity | Valence | 51 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:| 52 | | data_dictionary_AFINN | Nielsen's (2011) 'new ANEW' valenced word list | | ✔ | 53 | | data_dictionary_ANEW | Affective Norms for English Words (ANEW) | | ✔ | 54 | | data_dictionary_geninqposneg | Augmented General Inquirer _Positiv_ and _Negativ_ dictionary | ✔ | | 55 | | data_dictionary_HuLiu | Positive and negative words from Hu and Liu (2004) | ✔ | | 56 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists | ✔ | | 57 | | data_dictionary_LSD2015 | Lexicoder Sentiment Dictionary (2015) | ✔ | | 58 | | data_dictionary_NRC | NRC Word-Emotion Association Lexicon | ✔ | | 59 | | data_dictionary_Rauh | Rauh's German Political Sentiment Dictionary | ✔ | | 60 | | data_dictionary_sentiws | SentimentWortschatz (SentiWS) | ✔ | ✔ | 61 | 62 | 63 | ## Examples 64 | 65 | For a polarity dictionary, we can use the positive and negative key categories from the General Inquirer dictionary: 66 | ```{r} 67 | library("quanteda.sentiment") 68 | 69 | # inspect the dictionary and its polarities 70 | print(data_dictionary_geninqposneg, max_nval = 8) 71 | 72 | # compute sentiment 73 | tail(data_corpus_inaugural) |> 74 | textstat_polarity(dictionary = data_dictionary_geninqposneg) 75 | ``` 76 | 77 | For a valence dictionary, we can compute this for the "pleasure" category of the Affective Norms for English Words (ANEW): 78 | ```{r} 79 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE) 80 | library("quanteda.sentiment") 81 | 82 | # inspect the dictionary and its valences 83 | print(data_dictionary_ANEW, max_nval = 8) 84 | lapply(valence(data_dictionary_ANEW), head, 8) 85 | 86 | # compute the sentiment 87 | tail(data_corpus_inaugural) |> 88 | textstat_valence(dictionary = data_dictionary_ANEW["pleasure"]) 89 | ``` 90 | 91 | We can compare two measures computed in different ways (although they are not comparable, really, since they are different lexicons): 92 | ```{r} 93 | # ensure we have this package's version of the dictionary 94 | data("data_dictionary_LSD2015", package = "quanteda.sentiment") 95 | 96 | sent_pol <- tail(data_corpus_inaugural, 25) |> 97 | textstat_polarity(dictionary = data_dictionary_LSD2015) 98 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment) 99 | sent_val <- tail(data_corpus_inaugural, 25) |> 100 | textstat_valence(dictionary = data_dictionary_AFINN) 101 | 102 | library("ggplot2") 103 | 104 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment), 105 | aes(x = polarity, y = valence)) + 106 | geom_point() 107 | ``` 108 | 109 | Good enough for government work! 110 | 111 | ## Where to learn more 112 | 113 | Each dictionary and function has extensive documentation, including references to social scientific research articles where each sentiment concept is described in detail. There is also a package vignette with more detailed examples. 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # quanteda.sentiment 3 | 4 | 5 | 6 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/quanteda.sentiment)](https://cran.r-project.org/package=quanteda.sentiment) 7 | [![](https://img.shields.io/badge/devel%20version-0.31-royalblue.svg)](https://github.com/quanteda/quanteda.sentiment) 8 | [![Lifecycle: 9 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 10 | [![Codecov test 11 | coverage](https://codecov.io/gh/quanteda/quanteda.sentiment/branch/master/graph/badge.svg)](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master) 12 | [![R-CMD-check](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml) 13 | 14 | 15 | ## Installation 16 | 17 | You can install **quanteda.sentiment** from GitHub with: 18 | 19 | ``` r 20 | remotes::install_github("quanteda/quanteda.sentiment") 21 | ``` 22 | 23 | The package is not yet on CRAN. 24 | 25 | ## About 26 | 27 | **quanteda.sentiment** extends the **quanteda** package with functions 28 | for computing sentiment on text. It has two main functions, for 29 | computing two types of sentiment. These follow the structure of a 30 | **quanteda** dictionary, which consists of *key* entries expressing the 31 | canonical concept, and *value* patterns (such as “good”, “sad\*“, etc.) 32 | to be matched in a text and counted as occurrences of that key. 33 | 34 | The approach to sentiment in this package approaches sentiment 35 | computation in two ways, depending on whether sentiment is considered a 36 | key attribute, in which case the keys are assigned a *polarity* such as 37 | *positive* or *negative*, or whether individual values are assigned a 38 | *valence*, in the form of some continuous value indicating a degree of 39 | sentiment. Each is implemented in a separate function: 40 | 41 | - **Polarity-based sentiment.** This is implemented via 42 | `textstat_polarity()`, for computing a sentiment based on keys set as 43 | “poles” of positive versus negative sentiment. Setting polarity is 44 | dones through the `polarity()<-` function and can be set for any 45 | dictionary, for any keys. “Sentiment” here can be broadly construed as 46 | any contrasting pair of poles, such as “Democrat” versus “Republican”, 47 | for instance. More than one key can be associated with the same pole. 48 | 49 | Polar values are converted into sentiment scores using a flexible 50 | function, such as $\mathrm{log}(pos / neg)$, or 51 | $(pos - neg)/(pos + neg)$. **quanteda.sentiment** offers three 52 | built-in functions, but the user can supply any function for combining 53 | polarities. 54 | 55 | - **Valence-based sentiment.** This is implemented via 56 | `textstat_valence()`, for computing sentiment as the average valence 57 | of a document’s words, based on a dictionary whose values have numeric 58 | valence scores. Valence scores are set using the `valence()<-` 59 | function. Each key in a dictionary may have values with difference 60 | valences. 61 | 62 | The package comes with the following built-in dictionaries: 63 | 64 | | Name | Description | Polarity | Valence | 65 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:| 66 | | data_dictionary_AFINN | Nielsen’s (2011) ‘new ANEW’ valenced word list | | ✔ | 67 | | data_dictionary_ANEW | Affective Norms for English Words (ANEW) | | ✔ | 68 | | data_dictionary_geninqposneg | Augmented General Inquirer *Positiv* and *Negativ* dictionary | ✔ | | 69 | | data_dictionary_HuLiu | Positive and negative words from Hu and Liu (2004) | ✔ | | 70 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists | ✔ | | 71 | | data_dictionary_LSD2015 | Lexicoder Sentiment Dictionary (2015) | ✔ | | 72 | | data_dictionary_NRC | NRC Word-Emotion Association Lexicon | ✔ | | 73 | | data_dictionary_Rauh | Rauh’s German Political Sentiment Dictionary | ✔ | | 74 | | data_dictionary_sentiws | SentimentWortschatz (SentiWS) | ✔ | ✔ | 75 | 76 | ## Examples 77 | 78 | For a polarity dictionary, we can use the positive and negative key 79 | categories from the General Inquirer dictionary: 80 | 81 | ``` r 82 | library("quanteda.sentiment") 83 | ## Loading required package: quanteda 84 | ## Package version: 4.0.0 85 | ## Unicode version: 14.0 86 | ## ICU version: 71.1 87 | ## Parallel computing: 10 of 10 threads used. 88 | ## See https://quanteda.io for tutorials and examples. 89 | ## 90 | ## Attaching package: 'quanteda.sentiment' 91 | ## The following object is masked from 'package:quanteda': 92 | ## 93 | ## data_dictionary_LSD2015 94 | 95 | # inspect the dictionary and its polarities 96 | print(data_dictionary_geninqposneg, max_nval = 8) 97 | ## Dictionary object with 2 key entries. 98 | ## Polarities: pos = "positive"; neg = "negative" 99 | ## - [positive]: 100 | ## - abide, ability, able, abound, absolve, absorbent, absorption, abundance [ ... and 1,645 more ] 101 | ## - [negative]: 102 | ## - abandon, abandonment, abate, abdicate, abhor, abject, abnormal, abolish [ ... and 2,002 more ] 103 | 104 | # compute sentiment 105 | tail(data_corpus_inaugural) |> 106 | textstat_polarity(dictionary = data_dictionary_geninqposneg) 107 | ## doc_id sentiment 108 | ## 1 2001-Bush 0.9233579 109 | ## 2 2005-Bush 0.9829457 110 | ## 3 2009-Obama 0.5666378 111 | ## 4 2013-Obama 0.7597420 112 | ## 5 2017-Trump 0.7724428 113 | ## 6 2021-Biden 0.6018714 114 | ``` 115 | 116 | For a valence dictionary, we can compute this for the “pleasure” 117 | category of the Affective Norms for English Words (ANEW): 118 | 119 | ``` r 120 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE) 121 | library("quanteda.sentiment") 122 | 123 | # inspect the dictionary and its valences 124 | print(data_dictionary_ANEW, max_nval = 8) 125 | ## Dictionary object with 3 key entries. 126 | ## Valences set for keys: pleasure, arousal, dominance 127 | ## - [pleasure]: 128 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ] 129 | ## - [arousal]: 130 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ] 131 | ## - [dominance]: 132 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ] 133 | lapply(valence(data_dictionary_ANEW), head, 8) 134 | ## $pleasure 135 | ## abduction able abortion absent absurd abundance abuse accept 136 | ## 2.76 6.74 3.50 3.69 4.26 6.59 1.80 6.80 137 | ## 138 | ## $arousal 139 | ## abduction able abortion absent absurd abundance abuse accept 140 | ## 5.53 4.30 5.39 4.73 4.36 5.51 6.83 5.53 141 | ## 142 | ## $dominance 143 | ## abduction able abortion absent absurd abundance abuse accept 144 | ## 3.49 6.83 4.59 4.35 4.73 5.80 3.69 5.41 145 | 146 | # compute the sentiment 147 | tail(data_corpus_inaugural) |> 148 | textstat_valence(dictionary = data_dictionary_ANEW["pleasure"]) 149 | ## doc_id sentiment 150 | ## 1 2001-Bush 6.091330 151 | ## 2 2005-Bush 6.308839 152 | ## 3 2009-Obama 5.841437 153 | ## 4 2013-Obama 6.045129 154 | ## 5 2017-Trump 6.223944 155 | ## 6 2021-Biden 6.018528 156 | ``` 157 | 158 | We can compare two measures computed in different ways (although they 159 | are not comparable, really, since they are different lexicons): 160 | 161 | ``` r 162 | # ensure we have this package's version of the dictionary 163 | data("data_dictionary_LSD2015", package = "quanteda.sentiment") 164 | 165 | sent_pol <- tail(data_corpus_inaugural, 25) |> 166 | textstat_polarity(dictionary = data_dictionary_LSD2015) 167 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment) 168 | sent_val <- tail(data_corpus_inaugural, 25) |> 169 | textstat_valence(dictionary = data_dictionary_AFINN) 170 | 171 | library("ggplot2") 172 | 173 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment), 174 | aes(x = polarity, y = valence)) + 175 | geom_point() 176 | ``` 177 | 178 | ![](man/images/unnamed-chunk-6-1.png) 179 | 180 | Good enough for government work! 181 | 182 | ## Where to learn more 183 | 184 | Each dictionary and function has extensive documentation, including 185 | references to social scientific research articles where each sentiment 186 | concept is described in detail. There is also a package vignette with 187 | more detailed examples. 188 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /data/data_dictionary_AFINN.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_AFINN.rda -------------------------------------------------------------------------------- /data/data_dictionary_ANEW.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_ANEW.rda -------------------------------------------------------------------------------- /data/data_dictionary_HuLiu.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_HuLiu.rda -------------------------------------------------------------------------------- /data/data_dictionary_LSD2015.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LSD2015.rda -------------------------------------------------------------------------------- /data/data_dictionary_LoughranMcDonald.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LoughranMcDonald.rda -------------------------------------------------------------------------------- /data/data_dictionary_NRC.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_NRC.rda -------------------------------------------------------------------------------- /data/data_dictionary_Rauh.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_Rauh.rda -------------------------------------------------------------------------------- /data/data_dictionary_geninqposneg.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_geninqposneg.rda -------------------------------------------------------------------------------- /data/data_dictionary_sentiws.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_sentiws.rda -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | AFINN 2 | ANEW’ 3 | Analyzing 4 | Codecov 5 | ESWC 6 | EmoLex 7 | FL 8 | GermanPolarityClues 9 | HLT 10 | Heyer 11 | Hu 12 | HuLiu 13 | KDD 14 | Ks 15 | LREC 16 | Lexicoder 17 | Lifecycle 18 | Loughran 19 | LoughranMcDonald 20 | Microblogs 21 | Microposts 22 | Mikhaylov 23 | Mohammad 24 | NAACL 25 | NRC 26 | Negativ 27 | ODbL 28 | Positiv 29 | Quasthoff 30 | READMEs 31 | Rauh 32 | Rauh's 33 | Rauh’s 34 | Remus 35 | Ressources 36 | SIGKDD 37 | Saif 38 | SentiWS 39 | SentimentWortschatz 40 | Soroka 41 | Turney 42 | UF 43 | Waltinger 44 | afinn 45 | analyze 46 | analyzing 47 | damag 48 | dfm 49 | doi 50 | dones 51 | etc 52 | frac 53 | geninqposneg 54 | kein 55 | keine 56 | keinen 57 | mathrm 58 | neut 59 | nicht 60 | nichts 61 | pos 62 | quanteda 63 | sentiws 64 | textstat 65 | th 66 | tibble 67 | tokenization 68 | valenced 69 | Å 70 | Årup 71 | -------------------------------------------------------------------------------- /inst/extdata/afinn/AFINN-README.txt: -------------------------------------------------------------------------------- 1 | AFINN is a list of English words rated for valence with an integer 2 | between minus five (negative) and plus five (positive). The words have 3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file 4 | is tab-separated. There are two versions: 5 | 6 | AFINN-111: Newest version with 2477 words and phrases. 7 | 8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there 9 | are 1480 lines, as some words are listed twice. The word list in not 10 | entirely in alphabetic ordering. 11 | 12 | An evaluation of the word list is available in: 13 | 14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for 15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903 16 | 17 | The list was used in: 18 | 19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni, 20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in 21 | Twitter", The 2011 International Workshop on Social Computing, 22 | Network, and Services (SocialComNet 2011). 23 | 24 | 25 | This database of words is copyright protected and distributed under 26 | "Open Database License (ODbL) v1.0" 27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar 28 | copyleft license. 29 | 30 | See comments on the word list here: 31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis 32 | 33 | 34 | In Python the file may be read into a dictionary with: 35 | 36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)), 37 | [ line.split('\t') for line in open("AFINN-111.txt") ])) 38 | >>> afinn["Good".lower()] 39 | 3 40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split())) 41 | 2 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/data_dictionary_AFINN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_AFINN} 5 | \alias{data_dictionary_AFINN} 6 | \title{Nielsen's (2011) 'new ANEW' valenced word list} 7 | \format{ 8 | A \link{dictionary} with one key, \code{AFINN}, with valences from -5 (negative) to +5 9 | (positive). 10 | } 11 | \source{ 12 | \url{http://www2.imm.dtu.dk/pubdb/pubs/6010-full.html} 13 | } 14 | \usage{ 15 | data_dictionary_AFINN 16 | } 17 | \description{ 18 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Finn Årup 19 | Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of 20 | English words rated for valence with values between -5 (negative) and +5 21 | (positive). AFINN-111, the latest version, contains 2,477 words and phrases. 22 | } 23 | \section{License}{ 24 | 25 | \href{https://opendatacommons.org/licenses/odbl/1-0/}{Open Database License (ODbL) v1.0} 26 | } 27 | 28 | \references{ 29 | Nielsen, F. Å. (2011). \href{https://arxiv.org/abs/1103.2903}{A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs.} In \emph{Proceedings 30 | of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come 31 | in Small Packages}, 93--98. 32 | } 33 | \keyword{data} 34 | -------------------------------------------------------------------------------- /man/data_dictionary_ANEW.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_ANEW} 5 | \alias{data_dictionary_ANEW} 6 | \title{Affective Norms for English Words (ANEW)} 7 | \format{ 8 | A \link{dictionary} with three valenced keys: \code{pleasure}, \code{arousal}, and 9 | \code{dominance}, each with valences from 1 to 9 and containing the same 2,471 10 | fixed word values. 11 | } 12 | \usage{ 13 | data_dictionary_ANEW 14 | } 15 | \description{ 16 | A quanteda dictionary object containing the ANEW, or Affective Norms for 17 | English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a 18 | lexicon of 2,471 distinct fixed word matches that are associated with three 19 | valenced categories: pleasure, arousal, and dominance. 20 | } 21 | \section{License}{ 22 | 23 | ANEW Statement of Use 24 | 25 | In accepting the ANEW materials, I agree not to make the ANEW available to 26 | the media (television, magazines, etc.) or to place them on any internet or 27 | computer-accessible websites. I also agree not to publish the ANEW in any 28 | print format – including JOURNALS, newspapers, etc. I also agree that I will 29 | not provide the ANEW materials to profit making companies or organizations 30 | and I agree not to distribute my username and password to unauthorized 31 | parties. 32 | } 33 | 34 | \keyword{data} 35 | -------------------------------------------------------------------------------- /man/data_dictionary_HuLiu.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_HuLiu} 5 | \alias{data_dictionary_HuLiu} 6 | \title{Positive and negative words from Hu and Liu (2004)} 7 | \format{ 8 | A \link{dictionary} of fixed word patterns with two keys: 9 | \itemize{ 10 | \item \code{positive}: 2,006 words with positive polarity 11 | \item \code{negative}: 4,783 words with negative polarity 12 | } 13 | } 14 | \source{ 15 | \url{https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html} 16 | } 17 | \usage{ 18 | data_dictionary_HuLiu 19 | } 20 | \description{ 21 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing 2,006 22 | positive and 4,783 negative words from Hu and Liu (2004, 2005). 23 | } 24 | \section{License}{ 25 | 26 | Unknown. 27 | } 28 | 29 | \references{ 30 | Hu, M. & Liu, B. (2004). \href{https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf}{Mining and Summarizing Customer Reviews}. In 31 | Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery 32 | and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA. 33 | 34 | Liu, M., Hu, M., & Cheng, J. (2005). \href{https://www.cs.uic.edu/~liub/publications/www05-p536.pdf}{Opinion Observer: Analyzing and Comparing Opinions on the Web}. In 35 | Proceedings of the 14th International World Wide Web conference (WWW-2005), 36 | May 10--14, 2005, Chiba, Japan. 37 | } 38 | \keyword{data} 39 | -------------------------------------------------------------------------------- /man/data_dictionary_LSD2015.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_LSD2015} 5 | \alias{data_dictionary_LSD2015} 6 | \title{Lexicoder Sentiment Dictionary (2015)} 7 | \format{ 8 | A \link{dictionary} of four keys containing glob-style \link[=valuetype]{pattern matches}. 9 | \describe{ 10 | \item{\code{negative}}{2,858 word patterns indicating negative sentiment} 11 | \item{\code{positive}}{1,709 word patterns indicating positive sentiment} 12 | \item{\code{neg_positive}}{1,721 word patterns indicating a positive word preceded 13 | by a negation (used to convey negative sentiment)} 14 | \item{\code{neg_negative}}{2,860 word patterns indicating a negative word preceded 15 | by a negation (used to convey positive sentiment)} 16 | } 17 | } 18 | \usage{ 19 | data_dictionary_LSD2015 20 | } 21 | \description{ 22 | The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} \link{dictionary} 23 | format. 24 | } 25 | \details{ 26 | The dictionary consists of 2,858 "negative" sentiment words and 1,709 27 | "positive" sentiment words. A further set of 2,860 and 1,721 negations of 28 | negative and positive words, respectively, is also included. While many users 29 | will find the non-negation sentiment forms of the LSD adequate for sentiment 30 | analysis, Young and Soroka (2012) did find a small, but non-negligible 31 | increase in performance when accounting for negations. Users wishing to test 32 | this or include the negations are encouraged to subtract negated positive 33 | words from the count of positive words, and subtract the negated negative 34 | words from the negative count. 35 | 36 | Young and Soroka (2012) also suggest the use of a pre-processing script to 37 | remove specific cases of some words (i.e., "good bye", or "nobody better", 38 | which should not be counted as positive). Pre-processing scripts are 39 | available at \url{https://www.snsoroka.com/data-lexicoder/}. 40 | } 41 | \section{License and Conditions}{ 42 | 43 | The LSD is available for non-commercial academic purposes only. By using 44 | \code{data_dictionary_LSD2015}, you accept these terms. 45 | 46 | Please cite the references below when using the dictionary. 47 | } 48 | 49 | \examples{ 50 | # checking polarity 51 | polarity(data_dictionary_LSD2015) 52 | 53 | # simple example 54 | library("quanteda") 55 | txt <- "This aggressive policy will not win friends." 56 | 57 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015, 58 | exclusive = FALSE) 59 | ## tokens from 1 document. 60 | ## text1 : 61 | ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE" "POSITIVE" "." 62 | 63 | # notice that double-counting of negated and non-negated terms is avoided 64 | # when using nested_scope = "dictionary" 65 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015, 66 | exclusive = FALSE, nested_scope = "dictionary") 67 | ## tokens from 1 document. 68 | ## text1 : 69 | ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE." 70 | 71 | # on larger examples - notice that few negations are used 72 | tail(data_corpus_inaugural) |> 73 | tokens() |> 74 | tokens_lookup(dictionary = data_dictionary_LSD2015) |> 75 | dfm() 76 | } 77 | \references{ 78 | The objectives, development and reliability of the dictionary are discussed 79 | in detail in Young and Soroka (2012). Please cite this article when using 80 | the Lexicoder Sentiment Dictionary and related resources. 81 | Young, L. & Soroka, S. (2012). \emph{Lexicoder Sentiment 82 | Dictionary}. Available at \url{https://www.snsoroka.com/data-lexicoder/}. 83 | 84 | Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of 85 | Sentiment in Political Texts. \emph{Political Communication}, 29(2), 205--231. 86 | \doi{10.1080/10584609.2012.671234} 87 | } 88 | \keyword{data} 89 | -------------------------------------------------------------------------------- /man/data_dictionary_LoughranMcDonald.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_LoughranMcDonald} 5 | \alias{data_dictionary_LoughranMcDonald} 6 | \title{Loughran and McDonald Sentiment Word Lists} 7 | \format{ 8 | An object of class \code{dictionary2} of length 9. 9 | } 10 | \source{ 11 | \url{https://sraf.nd.edu/loughranmcdonald-master-dictionary/} 12 | } 13 | \usage{ 14 | data_dictionary_LoughranMcDonald 15 | } 16 | \description{ 17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing 18 | the 2014 version of the Loughran and McDonald Sentiment Word Lists. The 19 | categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903), 20 | "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68) 21 | and "modal words weak" (0). 22 | } 23 | \references{ 24 | Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability? 25 | Textual Analysis, Dictionaries, and 10-Ks. 26 | \emph{Journal of Finance}, 66(1), 35--65. \doi{10.1111/j.1540-6261.2010.01625.x} 27 | } 28 | \keyword{data} 29 | -------------------------------------------------------------------------------- /man/data_dictionary_NRC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_NRC} 5 | \alias{data_dictionary_NRC} 6 | \title{NRC Word-Emotion Association Lexicon} 7 | \format{ 8 | An object of class \code{dictionary2} of length 10. 9 | } 10 | \source{ 11 | \url{https://nrc.canada.ca/en/research-development/products-services/technical-advisory-services/sentiment-emotion-lexicons} 12 | 13 | See also \url{http://saifmohammad.com/WebPages/AccessResource.htm} 14 | } 15 | \usage{ 16 | data_dictionary_NRC 17 | } 18 | \description{ 19 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Mohammad and 20 | Charron's (2010, 2013) English version of the NRC Word-Emotion Association 21 | Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight 22 | emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust) 23 | and two sentiments (negative and positive) manually annotated on Amazon's 24 | Mechanical Turk. 25 | 26 | The Sentiment and Emotion Lexicons is a collection of lexicons that was 27 | entirely created by the experts of the National Research Council of Canada. 28 | Developed with a wide range of applications, this lexicon collection can be 29 | used in a multitude of contexts such as sentiment analysis, product 30 | marketing, consumer behaviour and even political campaign analysis. 31 | 32 | The technology uses a list of words that help identify emotions, sentiment, 33 | as well as analyzing hashtags, emoticons and word-colour associations. The 34 | lexicons contain entries for English words, and can be used to analyze 35 | English texts. 36 | } 37 | \note{ 38 | Technical and research-related questions can be addressed to Saif M. 39 | Mohammad (Senior Research Scientist at NRC): 40 | \code{Saif.Mohammad@nrc-cnrc.gc.ca}. 41 | } 42 | \section{License and Terms of Use}{ 43 | 44 | Free for research purposes. 45 | 46 | For questions about the commercial license, email Pierre Charron (Client 47 | Relationship Leader at NRC): \code{Pierre.Charron@nrc-cnrc.gc.ca}. 48 | 49 | Terms of Use: 50 | \itemize{ 51 | \item Cite the papers associated with the lexicons in your research papers and 52 | articles that make use of them. (The papers associated with each lexicon 53 | are listed below, and also in the READMEs for individual lexicons.) 54 | \item In news articles and online posts on work using these lexicons, cite the 55 | appropriate lexicons. For example: "This application/product/tool makes 56 | use of the \verb{resource name}, created by \code{author(s)} at the National 57 | Research Council Canada." (The creators of each lexicon are listed below. 58 | Also, if you send us an email, we will be thrilled to know about how you 59 | have used the lexicon.) If possible hyperlink to this page: 60 | \url{http://saifmohammad.com/WebPages/lexicons.html}. 61 | \item If you use a lexicon in a product or application, then acknowledge this in 62 | the 'About' page and other relevant documentation of the application by 63 | stating the name of the resource, the authors, and NRC. For example: "This 64 | application/product/tool makes use of the \verb{resource name}, created by 65 | \code{author(s)} at the National Research Council Canada." (The creators of 66 | each lexicon are listed below. Also, if you send us an email, we will be 67 | thrilled to know about how you have used the lexicon.) If possible 68 | hyperlink to this page: \url{http://saifmohammad.com/WebPages/lexicons.html}. 69 | \item Do not redistribute the data. Direct interested parties to this page: 70 | \url{http://saifmohammad.com/WebPages/AccessResource.htm}. 71 | \item National Research Council Canada (NRC) disclaims any responsibility for 72 | the use of the lexicons listed here and does not provide technical 73 | support. However, the contact listed above will be happy to respond to 74 | queries and clarifications. 75 | } 76 | } 77 | 78 | \references{ 79 | Mohammad, S. & Turney, P. (2013). \href{https://arxiv.org/abs/1308.6297}{Crowdsourcing a Word-Emotion Association Lexicon}. \emph{Computational Intelligence}, 80 | 29(3), 436--465. 81 | 82 | Mohammad, S. & Turney, P. (2010). \href{https://dl.acm.org/doi/10.5555/1860631.1860635}{Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon}. In \emph{Proceedings of 83 | the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and 84 | Generation of Emotion in Text}, June 2010, LA, California. 85 | } 86 | \keyword{data} 87 | -------------------------------------------------------------------------------- /man/data_dictionary_Rauh.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_Rauh} 5 | \alias{data_dictionary_Rauh} 6 | \title{Rauh's German Political Sentiment Dictionary} 7 | \format{ 8 | The dictionary has four keys. 9 | \describe{ 10 | \item{\code{negative}}{19,750 terms indicating negative sentiment} 11 | \item{\code{positive}}{17,330 terms indicating positive sentiment} 12 | \item{\code{neg_positive}}{17,330 terms indicating a positive word preceded 13 | by a negation (used to convey negative sentiment)} 14 | \item{\code{neg_negative}}{19,750 terms indicating a negative word preceded 15 | by a negation (used to convey positive sentiment)} 16 | } 17 | } 18 | \source{ 19 | \doi{10.7910/DVN/BKBXWD} 20 | } 21 | \usage{ 22 | data_dictionary_Rauh 23 | } 24 | \description{ 25 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the 26 | dictionaries provided in Rauh (forthcoming). Rauh assesses its performance 27 | against human intuition of sentiment in German political language 28 | (parliamentary speeches, party manifestos, and media coverage). The resource 29 | builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and 30 | GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the 31 | negation correction provided by the dictionary, currently a combination of 32 | \link[quanteda:tokens_replace]{tokens_replace} and \link[quanteda:tokens_compound]{tokens_compound} is 33 | required to harmonize the five covered bi-gram patterns prior to scoring. The 34 | example below shows how to conduct this transformation. Note that the process 35 | changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering 36 | some of the features of the original corpus. 37 | } 38 | \examples{ 39 | \donttest{ 40 | # tokenize example text 41 | toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch") 42 | # replace negation markers with "not" 43 | toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein", 44 | "keine", "keinen"), 45 | replacement = rep("not", 5)) 46 | # compound bi-gram negation patterns 47 | toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ") 48 | 49 | # apply dictionary 50 | tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |> 51 | dfm() 52 | } 53 | } 54 | \references{ 55 | Rauh, C. (2018). Validating a Sentiment Dictionary for German Political 56 | Language: A Workbench Note. 57 | \emph{Journal of Information Technology & Politics}, 15(4), 319--343. 58 | \doi{10.1080/19331681.2018.1485608} 59 | 60 | Remus, R., Quasthoff U., & Heyer, G. (2010). "\href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.}" 61 | In \emph{Proceedings of the 7th International Language Resources and Evaluation 62 | (LREC'10)}, 1168--1171. 63 | 64 | Waltinger, U. (2010). "\href{http://www.ulliwaltinger.de/pdf/91_Paper.pdf}{GermanPolarityClues: A Lexical Resource for German Sentiment Analysis}." In 65 | \emph{International Conference on Language Resources and Evaluation}, 17--23 May 66 | 2010 LREC'10. 67 | } 68 | \keyword{data} 69 | -------------------------------------------------------------------------------- /man/data_dictionary_geninqposneg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_geninqposneg} 5 | \alias{data_dictionary_geninqposneg} 6 | \title{Augmented General Inquirer \emph{Positiv} and \emph{Negativ} dictionary} 7 | \format{ 8 | A \link{dictionary} of fixed word patterns with two keys: 9 | \itemize{ 10 | \item \code{positive}: 1,653 words with positive polarity 11 | \item \code{negative}: 2,010 words with negative polarity 12 | } 13 | } 14 | \source{ 15 | \verb{http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm} -- 16 | although this site ceased operating some time in 2021 17 | } 18 | \usage{ 19 | data_dictionary_geninqposneg 20 | } 21 | \description{ 22 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the 23 | \emph{Positiv} and \emph{Negativ} dictionary entries from the augmented 24 | General Inquirer. These are new valence categories described at 25 | \verb{http://www.wjh.harvard.edu/~inquirer/homecat.htm} but also including the 26 | terms from the "yes" "no" dictionary entries. 27 | } 28 | \references{ 29 | Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966). 30 | \emph{The General Inquirer: A Computer Approach to Content Analysis.} 31 | Cambridge, MA: MIT Press. 32 | } 33 | \keyword{data} 34 | -------------------------------------------------------------------------------- /man/data_dictionary_sentiws.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-documentation.R 3 | \docType{data} 4 | \name{data_dictionary_sentiws} 5 | \alias{data_dictionary_sentiws} 6 | \title{SentimentWortschatz (SentiWS)} 7 | \format{ 8 | An object of class \code{dictionary2} of length 2. 9 | } 10 | \source{ 11 | \url{https://wortschatz.uni-leipzig.de/en/download/} 12 | } 13 | \usage{ 14 | data_dictionary_sentiws 15 | } 16 | \description{ 17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing 18 | SentimentWortschatz (SentiWS), a publicly available German-language resource 19 | for sentiment analysis. The current version of SentiWS contains 1,650 20 | positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 21 | negative word forms including their inflections. It not only contains 22 | adjectives and adverbs explicitly expressing a sentiment, but also nouns and 23 | verbs implicitly containing one. The original dictionary weights within the 24 | interval of -1 to 1. Note that the version implemented in 25 | \pkg{quanteda.dictionaries} uses a binary classification into positive 26 | (weight > 0) and negative (weight < 0) features. 27 | } 28 | \references{ 29 | Remus, R., Quasthoff U., and Heyer, G. (2010). \href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS: a Publicly Available German-language Resource for Sentiment Analysis}. 30 | In \emph{Proceedings of the 7th International Language Ressources and Evaluation 31 | (LREC'10)}, 1168--1171. 32 | } 33 | \keyword{data} 34 | -------------------------------------------------------------------------------- /man/get_polarity_dictionary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_polarity.R 3 | \name{get_polarity_dictionary} 4 | \alias{get_polarity_dictionary} 5 | \title{Get a standard polarity dictionary for sentiment analysis} 6 | \usage{ 7 | get_polarity_dictionary(dictionary) 8 | } 9 | \arguments{ 10 | \item{dictionary}{a \pkg{quanteda} \link{dictionary}} 11 | } 12 | \value{ 13 | a single-level \link{dictionary} with keys \code{pos}, \code{neg}, and (optionally) 14 | \code{neut}. 15 | } 16 | \description{ 17 | Checks and standardizes a \link{dictionary} object with its \link{polarity} set, so 18 | that the polarity categories are standardized into the keys \code{pos}, \code{neg}, and 19 | (optionally) \code{neut}. Also checks that the dictionary contains all of the 20 | keys named in the polarity object. (It is necessary to check here since the 21 | dictionary could have been subset after creation.) 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/images/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /man/images/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /man/polarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_polarity.R 3 | \name{polarity} 4 | \alias{polarity} 5 | \alias{polarity<-} 6 | \title{Set or get the sentiment polarity of a dictionary} 7 | \usage{ 8 | polarity(x) 9 | 10 | polarity(x) <- value 11 | } 12 | \arguments{ 13 | \item{x}{a \link{dictionary} object} 14 | 15 | \item{value}{list consisting of named character vectors \code{pos}, \code{neg}, and 16 | (optionally) \code{neut} corresponding to positive, negative, and neutral 17 | sentiment categories respectively. Each element may contain multiple 18 | key names. The \code{neut} category is optional but \code{pos} and \code{neg} must be 19 | supplied.} 20 | } 21 | \value{ 22 | \code{polarity()} returns the polarity as a list. 23 | 24 | \verb{polarity<-} sets the dictionary's polarity. 25 | } 26 | \description{ 27 | Set or retrieve the polarity of a \link{dictionary} object for the purposes of 28 | sentiment analysis. Polarity consists of a set of dictionary keys that are 29 | associated with positive, negative, and (optionally) neutral categories for 30 | use in \code{\link[=textstat_polarity]{textstat_polarity()}}. 31 | } 32 | \details{ 33 | A dictionary may have only one set of polarities at a time, but may be 34 | changed as needed. 35 | } 36 | \examples{ 37 | library("quanteda") 38 | simpledict <- dictionary(list( 39 | happy = c("happy", "jubilant", "exuberant"), 40 | sad = c("sad", "morose", "down") 41 | )) 42 | polarity(simpledict) 43 | polarity(simpledict) <- list(pos = "happy", neg = "sad") 44 | polarity(simpledict) 45 | 46 | # can list multiple keys 47 | polarity(data_dictionary_LSD2015) <- list( 48 | pos = c("positive", "neg_negative"), 49 | neg = c("negative", "neg_positive") 50 | ) 51 | polarity(data_dictionary_LSD2015) 52 | } 53 | \keyword{dictionary} 54 | \keyword{textstat} 55 | \keyword{utility} 56 | -------------------------------------------------------------------------------- /man/quanteda.sentiment-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quanteda.sentiment-package.R 3 | \docType{package} 4 | \name{quanteda.sentiment-package} 5 | \alias{quanteda.sentiment} 6 | \alias{quanteda.sentiment-package} 7 | \title{quanteda.sentiment: Sentiment Analysis using 'quanteda'} 8 | \description{ 9 | Adds functions and dictionaries for computing sentiment using the 'quanteda' package. 10 | } 11 | \author{ 12 | \strong{Maintainer}: Kenneth Benoit \email{kbenoit@lse.ac.uk} [copyright holder] 13 | 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/sentiment-functions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_polarity.R 3 | \name{sentiment-functions} 4 | \alias{sentiment-functions} 5 | \alias{sent_logit} 6 | \alias{sent_abspropdiff} 7 | \alias{sent_relpropdiff} 8 | \title{Sentiment functions} 9 | \usage{ 10 | sent_logit(x, smooth = 0.5) 11 | 12 | sent_abspropdiff(x) 13 | 14 | sent_relpropdiff(x) 15 | } 16 | \arguments{ 17 | \item{x}{a \link{dfm} that has the following required feature names: \code{pos}, 18 | \code{neg}, \code{neut}, and \code{other}} 19 | 20 | \item{smooth}{additional smoothing function added to \code{pos} and \code{neg} before 21 | logarithmic transformation} 22 | } 23 | \value{ 24 | a sparse \pkg{Matrix} object of documents by sentiment score, where 25 | the sentiment score is the only column. (Its name is unimportant as this 26 | will not be used by \code{\link[=textstat_polarity]{textstat_polarity()}}.) 27 | } 28 | \description{ 29 | Functions for computing sentiment, for \code{\link[=textstat_polarity]{textstat_polarity()}}. Each function 30 | takes an input \link{dfm} with fixed feature names (see Details), and returns a 31 | sparse Matrix with a single column representing the results of the sentiment 32 | calculation. 33 | 34 | \code{sent_logit} is \eqn{log(\frac{pos}{neg})}. 35 | 36 | \code{sent_abspropdiff} is \eqn{\frac{pos - neg}{N}}, where \eqn{N} 37 | is the total number of all features in a document. 38 | 39 | \code{sent_relpropdiff} is \eqn{\frac{pos - neg}{pos + neg}}. 40 | } 41 | \details{ 42 | User supplied functions must take \code{x} and optional additional arguments, such 43 | as \code{smooth} for a smoothing constant for the logit scaling function. feature 44 | names for the sentiment categories \code{pos}, \code{neg}, \code{neut}, and \code{other}. (The 45 | \code{other} category is only required when a scaling function needs the count of 46 | non-sentiment associated features.) 47 | 48 | Additional arguments may be passed via \code{...}, such as \code{smooth} for the logit 49 | scale. 50 | } 51 | \examples{ 52 | library("quanteda") 53 | dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |> 54 | tokens() |> 55 | dfm() 56 | sent_logit(dfmat) 57 | sent_abspropdiff(dfmat) 58 | 59 | # user-supplied function 60 | my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100 61 | my_sent_fn(dfmat) 62 | 63 | # user supplied function with fixed weights and using neutral category 64 | dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |> 65 | tokens() |> 66 | dfm() 67 | my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3 68 | my_sent_fn2(dfmat2) 69 | } 70 | \references{ 71 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 72 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies 73 | Quarterly}, 36(1), 123–155. 74 | \doi{10.1111/j.1939-9162.2010.00006.x} 75 | } 76 | \keyword{internal} 77 | \keyword{textstat} 78 | -------------------------------------------------------------------------------- /man/textstat_polarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_polarity.R 3 | \name{textstat_polarity} 4 | \alias{textstat_polarity} 5 | \title{Compute sentiment from key polarities} 6 | \usage{ 7 | textstat_polarity(x, dictionary, fun = sent_logit, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing 11 | text, tokens, or features whose sentiment will be scored} 12 | 13 | \item{dictionary}{a \link{dictionary} that has \link{polarity} set, indicating which 14 | keys are associated with positive, negative, and (optionally) neutral 15 | sentiment} 16 | 17 | \item{fun}{function; the formula for computing sentiment, which must refer to 18 | \code{pos}, \code{neg}, and (optionally) \code{neut}. The default is the "logit" scale 19 | (Lowe et al 2011) which is the log of (positive / negative) counts. See 20 | \link{sentiment-functions} for details and for additional available functions, 21 | as well as details on how to supply custom functions.} 22 | 23 | \item{...}{additional arguments passed to \code{fun}} 24 | } 25 | \value{ 26 | a \link{data.frame} of sentiment scores 27 | } 28 | \description{ 29 | Compute sentiment scores using a polarity approach, based on assigned 30 | categories (types or features) of positive, negative, and neutral sentiment. 31 | Several formulas for combining the polar categories are available, or the 32 | user can supply a custom function. 33 | } 34 | \examples{ 35 | library("quanteda") 36 | corp <- tail(data_corpus_inaugural, n = 5) 37 | toks <- tokens(corp) 38 | dfmat <- dfm(toks) 39 | polar1 <- list(pos = "positive", neg = "negative") 40 | polar2 <- list(pos = c("positive", "neg_negative"), 41 | neg = c("negative", "neg_positive")) 42 | 43 | polarity(data_dictionary_LSD2015) <- polar1 44 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 45 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015) 46 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015) 47 | 48 | polarity(data_dictionary_LSD2015) <- polar2 49 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 50 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015) 51 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015) 52 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015) 53 | 54 | # with a user-supplied function 55 | sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100 56 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn) 57 | } 58 | \references{ 59 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 60 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies 61 | Quarterly}, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x} 62 | } 63 | -------------------------------------------------------------------------------- /man/textstat_valence.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_valence.R 3 | \name{textstat_valence} 4 | \alias{textstat_valence} 5 | \title{Compute sentiment from word valences} 6 | \usage{ 7 | textstat_valence( 8 | x, 9 | dictionary, 10 | normalization = c("dictionary", "all", "none"), 11 | ... 12 | ) 13 | } 14 | \arguments{ 15 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing 16 | text, tokens, or features whose sentiment will be scored.} 17 | 18 | \item{dictionary}{a \pkg{quanteda} \link{dictionary} that has \link{valence} set, in 19 | the form of numerical valences associated with sentiment} 20 | 21 | \item{normalization}{the baseline for normalizing the sentiment counts after 22 | scoring. Sentiment scores within keys are weighted means of the tokens 23 | matched to dictionary values, weighted by their valences. The default 24 | \code{"dictionary"} is to average over only the valenced words. \code{"all"} 25 | averages across all tokens, and \code{"none"} does no normalization.} 26 | 27 | \item{...}{not used here} 28 | } 29 | \value{ 30 | a data.frame of sentiment scores 31 | } 32 | \description{ 33 | Compute sentiment scores from tokens or document-feature matrices, based on 34 | the valences of dictionary keys and values. 35 | } 36 | \note{ 37 | If the input item is a \link{dfm}, then multi-word values will not be matched 38 | unless the features of the \link{dfm} have been compounded previously. The input 39 | objects should not have had dictionaries applied previously. 40 | } 41 | \examples{ 42 | library("quanteda") 43 | \dontrun{ 44 | 45 | # AFINN 46 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 47 | package = "quanteda.sentiment"), 48 | header = FALSE, col.names = c("word", "valence")) 49 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word)) 50 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence) 51 | textstat_valence(toks, dictionary = data_dictionary_afinn) 52 | 53 | # ANEW 54 | anew <- read.delim(url("https://bit.ly/2zZ44w0")) 55 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat 56 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word, 57 | arousal = anew$Word, 58 | dominance = anew$Word)) 59 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn, 60 | arousal = anew$AroMn, 61 | dominance = anew$DomMn) 62 | textstat_valence(toks, data_dictionary_anew["pleasure"]) 63 | textstat_valence(toks, data_dictionary_anew["arousal"])} 64 | 65 | } 66 | \references{ 67 | For a discussion of how to aggregate sentiment scores to the document 68 | level, see: 69 | 70 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). 71 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies 72 | Quarterly}, 36(1), 123–155. 73 | \doi{10.1111/j.1939-9162.2010.00006.x} 74 | } 75 | \seealso{ 76 | \code{\link[=valence]{valence()}} 77 | } 78 | -------------------------------------------------------------------------------- /man/valence.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/textstat_valence.R 3 | \name{valence} 4 | \alias{valence} 5 | \alias{valence<-} 6 | \title{Set or get the valences of dictionary values or keys} 7 | \usage{ 8 | valence(x) 9 | 10 | valence(x) <- value 11 | } 12 | \arguments{ 13 | \item{x}{a \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object} 14 | 15 | \item{value}{named list consisting of numerical value. The names of the 16 | elements must correspond to a dictionary key. Each element must be: 17 | \itemize{ 18 | \item a single numeric value that will be applied to all of the dictionary 19 | values in that key; or 20 | \item a vector of numeric values that matches the length and order of the 21 | dictionary values in that key; or 22 | \item a named numeric vector where each element name matches dictionary values 23 | in the key. 24 | }} 25 | } 26 | \value{ 27 | \code{valences()} returns the valences as a list named numeric vectors, 28 | where each list element corresponds to a key in the dictionary, and each 29 | numeric element matches a value within that key. 30 | 31 | \verb{valence<-} sets the dictionary's valences. 32 | } 33 | \description{ 34 | Set or retrieve the valences of a \link{dictionary} object for the purposes of 35 | sentiment analysis. Valences consist of numerical values attached to each 36 | dictionary "value". For dictionaries with a more "polarity"-based approach, 37 | see \code{\link[=textstat_polarity]{textstat_polarity()}} 38 | } 39 | \details{ 40 | Valences are used only in \code{\link[=textstat_valence]{textstat_valence()}}. 41 | 42 | A dictionary may have only one set of valences at a time, but may be 43 | changed as needed. 44 | } 45 | \examples{ 46 | library("quanteda") 47 | 48 | # setting valences 49 | dict <- dictionary(list( 50 | happiness = c("happy", "jubilant", "exuberant", "content"), 51 | anger = c("mad", "peeved", "irate", "furious", "livid") 52 | )) 53 | valence(dict) 54 | # using a 5-point scale: 1:1 match 55 | valence(dict) <- list(happiness = c(3, 4, 5, 2), 56 | anger = c(3.1, 2.4, 2.9, 4.1, 5.0)) 57 | valence(dict) 58 | # with single valences applied to all values within the keys 59 | valence(dict) <- c(happiness = 1, anger = -1) 60 | valence(dict) 61 | # with named elements - order does not matter 62 | valence(dict) <- list( 63 | happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2) 64 | ) 65 | valence(dict) 66 | 67 | } 68 | \seealso{ 69 | \code{\link[=textstat_valence]{textstat_valence()}}, \code{\link[=valence]{valence()}} 70 | } 71 | \keyword{dictionary} 72 | \keyword{textstat} 73 | \keyword{utility} 74 | -------------------------------------------------------------------------------- /sources/AFINN/AFINN-README.txt: -------------------------------------------------------------------------------- 1 | AFINN is a list of English words rated for valence with an integer 2 | between minus five (negative) and plus five (positive). The words have 3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file 4 | is tab-separated. There are two versions: 5 | 6 | AFINN-111: Newest version with 2477 words and phrases. 7 | 8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there 9 | are 1480 lines, as some words are listed twice. The word list in not 10 | entirely in alphabetic ordering. 11 | 12 | An evaluation of the word list is available in: 13 | 14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for 15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903 16 | 17 | The list was used in: 18 | 19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni, 20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in 21 | Twitter", The 2011 International Workshop on Social Computing, 22 | Network, and Services (SocialComNet 2011). 23 | 24 | 25 | This database of words is copyright protected and distributed under 26 | "Open Database License (ODbL) v1.0" 27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar 28 | copyleft license. 29 | 30 | See comments on the word list here: 31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis 32 | 33 | 34 | In Python the file may be read into a dictionary with: 35 | 36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)), 37 | [ line.split('\t') for line in open("AFINN-111.txt") ])) 38 | >>> afinn["Good".lower()] 39 | 3 40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split())) 41 | 2 42 | 43 | 44 | -------------------------------------------------------------------------------- /sources/AFINN/create-data_dictionary_AFINN.R: -------------------------------------------------------------------------------- 1 | # AFINN Dictionary 2 | 3 | library("quanteda") 4 | 5 | afinn111 <- read.delim("AFINN/AFINN-111.txt", header = FALSE, col.names = c("word", "valence")) 6 | afinn96 <- read.delim("AFINN/AFINN-96.txt", header = FALSE, col.names = c("word", "valence")) 7 | 8 | afinn111 <- dplyr::arrange(afinn111, word) 9 | afinn96 <- dplyr::arrange(afinn96, word) 10 | dplyr::filter(afinn96, duplicated(afinn96$word)) 11 | 12 | data_dictionary_AFINN <- dictionary(list("AFINN" = afinn111$word)) 13 | valence(data_dictionary_AFINN) <- list("AFINN" = afinn111$valence) 14 | 15 | meta(data_dictionary_AFINN) <- list( 16 | title = "Finn Årup Nielsen's (2011) 'new ANEW' valenced word list", 17 | description = "AFINN is a list of English words rated for valence with an integer between minus five (negative) and plus five (positive), manually labeled by Finn Årup Nielsen in 2009-2011. This dictionary is the newer AFINN-111 version with 2,477 words and phrases.", 18 | url = "http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=6010", 19 | reference = "Nielsen, F. Å. (2011). A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs. In Proceedings of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come in Small Packages, 93--98.", 20 | license = "This database of words is copyright protected and distributed under the Open Database License (ODbL) v1.0, https://www.opendatacommons.org/licenses/odbl/1.0/" 21 | ) 22 | 23 | meta(data_dictionary_AFINN) <- 24 | lapply(meta(data_dictionary_AFINN), function(x) Encoding(x) <- "UTF-8") 25 | 26 | usethis::use_data(data_dictionary_AFINN, overwrite = TRUE) 27 | 28 | -------------------------------------------------------------------------------- /sources/ANEW/create-data_dictionary_ANEW.R: -------------------------------------------------------------------------------- 1 | # ANEW 2 | 3 | library("quanteda") 4 | 5 | anew <- read.delim(url("https://bit.ly/2zZ44w0")) 6 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat 7 | data_dictionary_ANEW <- dictionary(list(pleasure = anew$Word, 8 | arousal = anew$Word, 9 | dominance = anew$Word)) 10 | valence(data_dictionary_ANEW) <- list(pleasure = anew$ValMn, 11 | arousal = anew$AroMn, 12 | dominance = anew$DomMn) 13 | 14 | meta(data_dictionary_ANEW) <- 15 | list( 16 | title = "Affective Norms for English Words (ANEW)", 17 | description = "A quanteda dictionary object containing the ANEW, or Affective Norms for English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance.", 18 | url = "https://csea.phhp.ufl.edu/media.html#bottommedia", 19 | reference = "Bradley, M.M. & Lang, P.J. (2017). Affective Norms for English Words (ANEW): Instruction manual and affective ratings. Technical Report C-3. Gainesville, FL:UF Center for the Study of Emotion and Attention.", 20 | license = "For non-profit academic research purposes." 21 | ) 22 | 23 | usethis::use_data(data_dictionary_ANEW, overwrite = TRUE) 24 | 25 | -------------------------------------------------------------------------------- /sources/Hu-Liu/create_data_dictionary-HuLiu.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | 3 | data_dictionary_HuLiu <- 4 | dictionary(list(positive = scan(file = "Hu-Liu/positive-words.txt", 5 | what = "character", comment.char = ";"), 6 | negative = scan(file = "Hu-Liu/negative-words-UTF8.txt", 7 | what = "character", comment.char = ";"))) 8 | 9 | meta(data_dictionary_HuLiu) <- 10 | list( 11 | title = "Positive and negative words from Hu and Liu (2004)", 12 | url = "http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html", 13 | description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).", 14 | reference = "Hu, M. & Liu, B. (2004). Mining and Summarizing Customer Reviews. In Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA. https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf 15 | 16 | Liu, M., Hu, M., & Cheng, J. (2005). Opinion Observer: Analyzing and Comparing Opinions on the Web. In Proceedings of the 14th International World Wide Web conference (WWW-2005), May 10--14, 2005, Chiba, Japan. https://www.cs.uic.edu/~liub/publications/www05-p536.pdf", 17 | license = "Unknown" 18 | ) 19 | polarity(data_dictionary_HuLiu) <- list(pos = "positive", neg = "negative") 20 | 21 | usethis::use_data(data_dictionary_HuLiu, overwrite = TRUE) 22 | -------------------------------------------------------------------------------- /sources/Laver-Garry/Laver_and_Garry_2000.cat: -------------------------------------------------------------------------------- 1 | CULTURE 2 | CULTURE-HIGH 3 | ART (1) 4 | ARTISTIC (1) 5 | DANCE (1) 6 | GALLER* (1) 7 | MUSEUM* (1) 8 | MUSIC* (1) 9 | OPERA* (1) 10 | THEATRE* (1) 11 | CULTURE-POPULAR 12 | MEDIA (1) 13 | SPORT 14 | ANGLER* (1) 15 | PEOPLE (1) 16 | WAR_IN_IRAQ (1) 17 | CIVIL_WAR (1) 18 | ECONOMY 19 | +STATE+ 20 | ACCOMMODATION (1) 21 | AGE (1) 22 | AMBULANCE (1) 23 | ASSIST (1) 24 | BENEFIT (1) 25 | CARE (1) 26 | CARER* (1) 27 | CHILD* (1) 28 | CLASS (1) 29 | CLASSES (1) 30 | CLINICS (1) 31 | COLLECTIVE* (1) 32 | CONTRIBUTION* (1) 33 | COOPERATIVE* (1) 34 | CO-OPERATIVE* (1) 35 | DEPRIVATION (1) 36 | DISABILITIES (1) 37 | DISADVANTAGED (1) 38 | EDUCAT* (1) 39 | ELDERLY (1) 40 | EQUAL* (1) 41 | ESTABLISH (1) 42 | FAIR* (1) 43 | GUARANTEE* (1) 44 | HARDSHIP (1) 45 | HEALTH* (1) 46 | HOMELESS* (1) 47 | HOSPITAL* (1) 48 | HUNGER (1) 49 | INEQUAL* (1) 50 | INVEST (1) 51 | INVESTING (1) 52 | INVESTMENT (1) 53 | MEANS-TEST* (1) 54 | NURSE* (1) 55 | PATIENTS (1) 56 | PENSION (1) 57 | POOR (1) 58 | POORER (1) 59 | POOREST (1) 60 | POVERTY (1) 61 | REHOUSE* (1) 62 | RE-HOUSE* (1) 63 | SCHOOL (1) 64 | TEACH* (1) 65 | TRANSPORT (1) 66 | UNDERFUND* (1) 67 | UNEMPLOY* (1) 68 | VULNERABLE (1) 69 | WIDOW* (1) 70 | =STATE= 71 | ACCOUNTANT (1) 72 | ACCOUNTING (1) 73 | ACCOUNTS (1) 74 | ADVERT* (1) 75 | AIRLINE* (1) 76 | AIRPORT* (1) 77 | AUDIT* (1) 78 | BANK* (1) 79 | BARGAINING (1) 80 | BREADWINNER* (1) 81 | BUDGET* (1) 82 | BUY* (1) 83 | CARTEL* (1) 84 | CASH* (1) 85 | CHARGE* (1) 86 | COMMERCE* (1) 87 | COMPENSAT* (1) 88 | CONSUM* (1) 89 | COST* (1) 90 | CREDIT* (1) 91 | CUSTOMER* (1) 92 | DEBT* (1) 93 | DEFICIT* (1) 94 | DWELLING* (1) 95 | EARN* (1) 96 | ECON* (1) 97 | ELECTRICITY (1) 98 | ESTATE* (1) 99 | EXPORT* (1) 100 | FEE (1) 101 | FEES (1) 102 | FINANC* (1) 103 | HOUS* (1) 104 | IMPORT (1) 105 | IMPORTS (1) 106 | INDUSTR* (1) 107 | JOBS (1) 108 | LEASE* (1) 109 | LOAN* (1) 110 | MANUFACTUR* (1) 111 | MORTGAGE* (1) 112 | NEGOTIAT* (1) 113 | OPPORTUNITY (1) 114 | PARTNERSHIP* (1) 115 | PASSENGER* (1) 116 | PAY* (1) 117 | PERFORMANCE (1) 118 | PORT* (1) 119 | PRODUCTIVITY (1) 120 | PROFESSION* (1) 121 | PURCHAS* (1) 122 | RAILWAY* (1) 123 | REBATE* (1) 124 | RECESSION* (1) 125 | RESEARCH* (1) 126 | REVENUE* (1) 127 | SALAR* (1) 128 | SELL* (1) 129 | SETTLEMENT (1) 130 | SOFTWARE (1) 131 | SUPPLIER* (1) 132 | SUPPLY (1) 133 | TELECOM* (1) 134 | TELEPHON* (1) 135 | TENAN* (1) 136 | TOURIS* (1) 137 | TRADE (1) 138 | TRAIN* (1) 139 | WAGE* (1) 140 | WELFARE (1) 141 | WORK* (1) 142 | -STATE- 143 | ASSETS (1) 144 | AUTONOMY (1) 145 | BARRIER* (1) 146 | BID (1) 147 | BIDDERS (1) 148 | BIDDING (1) 149 | BURDEN* (1) 150 | CHARIT* (1) 151 | CHOICE* (1) 152 | COMPET* (1) 153 | CONFIDENCE (1) 154 | CONFISCATORY (1) 155 | CONSTRAIN* (1) 156 | CONTRACTING* (1) 157 | CONTRACTOR* (1) 158 | CONTROLLED (1) 159 | CONTROLLING (1) 160 | CONTROLS (1) 161 | CORPORATE (1) 162 | CORPORATION* (1) 163 | DEREGULATING (1) 164 | DISMANTL* (1) 165 | ENTREPRENEUR* (1) 166 | EXPENSIVE (1) 167 | FLEXIB* (1) 168 | FRANCHISE* (1) 169 | FUNDHOLD* (1) 170 | FUND-HOLDING (1) 171 | HOMESTEAD* (1) 172 | INITIATIVE (1) 173 | INTRUSIVE (1) 174 | INVESTOR* (1) 175 | LIBERALI* (1) 176 | MARKET* (1) 177 | MONETARY (1) 178 | MONEY (1) 179 | OWN* (1) 180 | PRIVATE (1) 181 | PRIVATELY (1) 182 | PRIVATISATIONS (1) 183 | PRIVATISED (1) 184 | PRIVATISING (1) 185 | PRODUCE* (1) 186 | PROFITABLE (1) 187 | REGULAT* (1) 188 | RETAIL* (1) 189 | RISK (1) 190 | RISKS (1) 191 | SAVINGS (1) 192 | SELL* (1) 193 | SHARES (1) 194 | SIMPLIF* (1) 195 | SPEND* (1) 196 | SPONSORSHIP (1) 197 | TAXABLE (1) 198 | TAXES (1) 199 | TAX-FREE (1) 200 | THRIFT* (1) 201 | TRADING (1) 202 | VALUE (1) 203 | VOLUNT* (1) 204 | VOUCHER* (1) 205 | ENVIRONMENT 206 | CON ENVIRONMENT 207 | PRODUC* (1) 208 | PRO ENVIRONMENT 209 | CAR (1) 210 | CATALYTIC (1) 211 | CHEMICAL* (1) 212 | CHIMNEY* (1) 213 | CLEAN* (1) 214 | CONGESTION (1) 215 | CYCLIST* (1) 216 | DEPLET* (1) 217 | ECOLOG* (1) 218 | EMISSION* (1) 219 | ENERGY-SAVING (1) 220 | ENVIRONMENT* (1) 221 | FUR (1) 222 | GREEN (1) 223 | HABITAT* (1) 224 | HEDGEROW* (1) 225 | HUSBANDED (1) 226 | LITTER* (1) 227 | OPENCAST (1) 228 | OPEN-CAST* (1) 229 | OZONE (1) 230 | PLANET (1) 231 | POPULATION (1) 232 | RECYCL* (1) 233 | RE-CYCL* (1) 234 | RE-USE (1) 235 | TOXIC (1) 236 | WARMING (1) 237 | GROUPS 238 | ETHNIC 239 | ASIAN* (1) 240 | BUDDHIST* (1) 241 | ETHNIC* (1) 242 | RACE (1) 243 | RACI* (1) 244 | WOMEN 245 | GIRLS (1) 246 | WOMAN (1) 247 | WOMEN (1) 248 | INSTITUTIONS 249 | CONSERVATIVE 250 | AUTHORITY (1) 251 | CONTINU* (1) 252 | DISRUPT* (1) 253 | INSPECT* (1) 254 | JURISDICTION* (1) 255 | LEGITIMATE (1) 256 | MANAG* (1) 257 | MORATORIUM (1) 258 | RUL* (1) 259 | STRIKE* (1) 260 | WHITEHALL (1) 261 | NEUTRAL 262 | ADMINISTR* (1) 263 | ADVIS* (1) 264 | AGENC* (1) 265 | AMALGAMAT* (1) 266 | APPOINT* (1) 267 | ASSEMBLY (1) 268 | CHAIR* (1) 269 | COMMISSION* (1) 270 | COMMITTEE* (1) 271 | CONSTITUEN* (1) 272 | COUNCIL* (1) 273 | DEPARTMENT* (1) 274 | DIRECTORATE* (1) 275 | EXECUTIVE* (1) 276 | HEADQUARTERS (1) 277 | LEGISLAT* (1) 278 | MECHANISM* (1) 279 | MINISTER* (1) 280 | OFFICE (1) 281 | OFFICES (1) 282 | OFFICIAL (1) 283 | OPERAT* (1) 284 | OPPOSITION (1) 285 | ORGANISATION* (1) 286 | PARLIAMENT* (1) 287 | PRESIDEN* (1) 288 | PROCEDUR* (1) 289 | PROCESS* (1) 290 | QUEEN (1) 291 | REGIST* (1) 292 | SCHEME* (1) 293 | SECRETARIAT* (1) 294 | SOVEREIGN* (1) 295 | SUBCOMMITTEE* (1) 296 | TRIBUNAL* (1) 297 | VOTE* (1) 298 | VOTING (1) 299 | WESTMINSTER (1) 300 | RADICAL 301 | ABOLITION (1) 302 | ACCOUNTABLE (1) 303 | ANSWERABLE (1) 304 | CONSULT* (1) 305 | CORRUPT* (1) 306 | DEMOCRATIC* (1) 307 | ELECT* (1) 308 | IMPLEMENT* (1) 309 | MODERN* (1) 310 | MONITOR* (1) 311 | REBUILD* (1) 312 | REEXAMINE* (1) 313 | REFORM* (1) 314 | RE-ORGANI* (1) 315 | REPEAL* (1) 316 | REPLACE* (1) 317 | REPRESENTAT* (1) 318 | SCANDAL* (1) 319 | SCRAP (1) 320 | SCRAP* (1) 321 | SCRUTIN* (1) 322 | TRANSFORM* (1) 323 | VOICE* (1) 324 | LAW_AND_ORDER 325 | LAW-CONSERVATIVE 326 | ASSAULTS (1) 327 | BAIL (1) 328 | BURGLAR* (1) 329 | CONSTAB* (1) 330 | CONVICT* (1) 331 | COURT (1) 332 | COURTS (1) 333 | CUSTOD* (1) 334 | DEALING (1) 335 | DELINQUEN* (1) 336 | DETER (1) 337 | DETER* (1) 338 | DISORDER (1) 339 | DRUG* (1) 340 | FINE (1) 341 | FINES (1) 342 | FIRMNESS (1) 343 | FORCE* (1) 344 | FRAUD* (1) 345 | GUARD* (1) 346 | HOOLIGAN* (1) 347 | ILLEGAL* (1) 348 | INTIMIDAT* (1) 349 | JOY-RIDE* (1) 350 | LAWLESS* (1) 351 | MAGISTRAT* (1) 352 | OFFENCE* (1) 353 | OFFICER* (1) 354 | PENAL* (1) 355 | POLICE (1) 356 | POLICEMEN (1) 357 | POLICING (1) 358 | PRISON* (1) 359 | PROBATION (1) 360 | PROSECUTION (1) 361 | PUNISH* (1) 362 | RE-OFFEND (1) 363 | RUC (1) 364 | SEIZ* (1) 365 | SENTENCE* (1) 366 | SHOP-LIFTING (1) 367 | SQUATTING (1) 368 | TERROR* (1) 369 | THEFT* (1) 370 | THUG* (1) 371 | TOUGH* (1) 372 | TRAFFICKER* (1) 373 | UNIFORMED (1) 374 | UNLAWFUL (1) 375 | VANDAL* (1) 376 | VICTIM* (1) 377 | VIGILAN* (1) 378 | LAW-LIBERAL 379 | HARASSMENT (1) 380 | NON-CUSTODIAL (1) 381 | RURAL 382 | AGRICULTUR* (1) 383 | BADGERS (1) 384 | BIRD* (1) 385 | COUNTRYSIDE (1) 386 | FARM* (1) 387 | FEED (1) 388 | FISH* (1) 389 | FOREST* (1) 390 | HENS (1) 391 | HORSE* (1) 392 | LANDSCAPE* (1) 393 | LANE* (1) 394 | LIVESTOCK (1) 395 | MEADOWS (1) 396 | VILLAGE* (1) 397 | WILDLIFE (1) 398 | URBAN 399 | TOWN* (1) 400 | VALUES 401 | CONSERVATIVE 402 | DEFEND (1) 403 | DEFENDED (1) 404 | DEFENDING (1) 405 | DISCIPLINE (1) 406 | GLORIES (1) 407 | GLORIOUS (1) 408 | GRAMMAR (1) 409 | HERITAGE (1) 410 | HISTOR* (1) 411 | HONOUR* (1) 412 | IMMIGRA* (1) 413 | INHERIT* (1) 414 | INTEGRITY (1) 415 | JUBILEE* (1) 416 | LEADER* (1) 417 | MAINTAIN (1) 418 | MAJESTY (1) 419 | MARRIAGE (1) 420 | OBSCEN* (1) 421 | PAST (1) 422 | PORNOGRAPH* (1) 423 | PRESERV* (1) 424 | PRIDE (1) 425 | PRINCIPL* (1) 426 | PROBITY (1) 427 | PROFESSIONALISM (1) 428 | PROUD (1) 429 | PUNCTUAL* (1) 430 | RECAPTURE* (1) 431 | RELIAB* (1) 432 | THREAT* (1) 433 | TRADITION* (1) 434 | LIBERAL 435 | CRUEL* (1) 436 | DISCRIMINAT* (1) 437 | HUMAN* (1) 438 | INJUSTICE* (1) 439 | INNOCENT (1) 440 | INTER_RACIAL (1) 441 | MINORIT* (1) 442 | REPRESSI* (1) 443 | RIGHTS (1) 444 | SEX* (1) 445 | -------------------------------------------------------------------------------- /sources/Laver-Garry/create-data_dictionary_LaverGarry.R: -------------------------------------------------------------------------------- 1 | # Laver and Garry Dictionary of Policy Positions 2 | 3 | library("quanteda") 4 | 5 | data_dictionary_LaverGarry <- dictionary(file = "Laver-Garry/Laver_and_Garry_2000.cat") 6 | 7 | usethis::use_data(data_dictionary_LaverGarry, overwrite = TRUE) 8 | -------------------------------------------------------------------------------- /sources/Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R: -------------------------------------------------------------------------------- 1 | # Loughran and McDonald Sentiment Word Lists 2 | 3 | library("quanteda") 4 | 5 | data_dictionary_LoughranMcDonald <- dictionary(file = "Loughran-McDonald/Loughran_and_McDonald_2014.cat") 6 | 7 | polarity(data_dictionary_LoughranMcDonald) <- 8 | list(pos = c("POSITIVE"), neg = c("NEGATIVE")) 9 | 10 | meta(data_dictionary_LoughranMcDonald) <- 11 | list( 12 | title = "Loughran and McDonald Sentiment Word Lists", 13 | description = "A quanteda dictionary object containing the 2014 version of the Loughran and McDonald Sentiment Word Lists. The categories are 'negative' (2355 features), 'positive' (354), 'uncertainty' (297), 'litigious' (903), 'constraining' (184), 'superfluous' (56), 'interesting' (68), 'modal words strong' (68) and 'modal words weak' (0).", 14 | url = "http://sraf.nd.edu/textual-analysis/resources/", 15 | reference = "Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability? Textual Analysis, Dictionaries, and 10-Ks. Journal of Finance, 66(1), 35-65.", 16 | license = "The data compilations provided on this website are for use by individual researchers. For commercial licenses please contact mcdonald.1@nd.edu." 17 | ) 18 | 19 | usethis::use_data(data_dictionary_LoughranMcDonald, overwrite = TRUE) 20 | -------------------------------------------------------------------------------- /sources/MFD/create-data_dictionary_MFD.R: -------------------------------------------------------------------------------- 1 | # Moral Foundations Dictionary 2 | 3 | library("quanteda") 4 | 5 | #data_dictionary_MFD <- dictionary(file = "sources/MFD/moral_foundations_dictionary.dic") 6 | data_dictionary_MFD <- dictionary(file = "MFD/mfd2.0.dic") 7 | 8 | usethis::use_data(data_dictionary_MFD, overwrite = TRUE) 9 | -------------------------------------------------------------------------------- /sources/MFD/mfd2.0.dic: -------------------------------------------------------------------------------- 1 | % 1 care.virtue 2 care.vice 3 fairness.virtue 4 fairness.vice 5 loyalty.virtue 6 loyalty.vice 7 authority.virtue 8 authority.vice 9 sanctity.virtue 10 sanctity.vice % compassion 1 empathy 1 kindness 1 caring 1 generosity 1 benevolence 1 altruism 1 compassionate 1 nurture 1 gentleness 1 nurturance 1 sympathy 1 nurturing 1 motherly 1 love 1 beneficence 1 empathize 1 helpfulness 1 loving 1 pity 1 mercy 1 nurturer 1 compassionately 1 nurturers 1 caringly 1 empathising 1 merciful 1 empathizing 1 nurtures 1 warmhearted 1 empathizers 1 protectiveness 1 nurtured 1 benevolent 1 mothering 1 cared 1 healing 1 empathises 1 humane 1 comfort 1 pitied 1 loved 1 altruist 1 cares 1 pitying 1 comforted 1 hug 1 comforting 1 consoling 1 empathizes 1 sympathize 1 care 1 caregiver 1 empathised 1 hugs 1 heal 1 generous 1 condolences 1 mothered 1 charitable 1 generously 1 pities 1 condolence 1 help 1 consolingly 1 solace 1 mother 1 healer 1 hospitality 1 charity 1 empathized 1 healers 1 pityingly 1 mothers 1 child 1 lovingly 1 parenting 1 rescuing 1 rescuer 1 loves 1 consoled 1 clothe 1 sympathizing 1 helping 1 shared 1 childhood 1 mommy 1 vulnerability 1 helpers 1 lover 1 hospitable 1 sharer 1 feeding 1 nursed 1 helper 1 safeness 1 nurses 1 protector 1 motherhood 1 alleviation 1 nursemaid 1 safeguard 1 protect 1 healthiness 1 protecters 1 patient 1 nurse 1 vulnerable 1 benefit 1 feed 1 childcare 1 rescuers 1 hugged 1 helpful 1 rescues 1 nursing 1 protecting 1 heals 1 childbearing 1 hugger 1 relief 1 healed 1 rescued 1 patients 1 share 1 rescue 1 healthy 1 hospitalise 1 hospitalising 1 hugging 1 nursery 1 healthier 1 sharing 1 helps 1 sympathizers 1 hospitalises 1 alleviating 1 wounded 1 wounds 1 hospitalize 1 alleviate 1 protective 1 protection 1 health 1 relieve 1 sympathizer 1 safety 1 beneficiary 1 helped 1 hospital 1 childbirth 1 benefits 1 healthcare 1 relievers 1 feeds 1 hospitalization 1 benefitting 1 relieving 1 safe 1 feeder 1 benefitted 1 hospitalized 1 unharmful 1 protects 1 unharmed 1 protecter 1 safely 1 safekeeping 1 hospitalizing 1 wounding 1 reliever 1 shares 1 relieves 1 alleviates 1 relieved 1 hospitalizes 1 console 1 protectorate 1 alleviated 1 protected 1 wound 1 consoles 1 harm 2 suffer 2 hurt 2 harmed 2 hurting 2 hurts 2 cruel 2 endanger 2 harming 2 harms 2 suffering 2 threaten 2 inflict 2 suffered 2 harmful 2 inflicted 2 mistreat 2 endangers 2 damaging 2 injurious 2 victimize 2 inflicts 2 hurtful 2 suffers 2 inflicting 2 injures 2 vulnerable 2 unkind 2 damage 2 kill 2 die 2 victimizes 2 torment 2 destroy 2 brutalise 2 brutalises 2 distresses 2 endangering 2 mistreats 2 afflict 2 distressing 2 destroys 2 victimises 2 maltreat 2 pain 2 harsh 2 mistreated 2 ravage 2 threatened 2 harass 2 unkindness 2 afflicted 2 threatens 2 threatening 2 distress 2 brutalize 2 tormenting 2 brutalized 2 victimizing 2 damager 2 damaged 2 bully 2 agony 2 abused 2 coldhearted 2 inhuman 2 injured 2 torments 2 brutalizes 2 uncompassionate 2 cruelty 2 tormented 2 mistreating 2 endangered 2 uncaring 2 anguishes 2 destroying 2 killed 2 mistreatment 2 bullied 2 harsher 2 cruelness 2 tortured 2 pained 2 tortures 2 torturing 2 maltreated 2 anguish 2 persecutes 2 maltreatment 2 brutalizing 2 attacked 2 victim 2 crying 2 damages 2 discomforting 2 abusing 2 threat 2 persecute 2 brutalization 2 violent 2 annihilated 2 torturous 2 harasses 2 injurer 2 destroyed 2 molests 2 molest 2 injuring 2 afflicts 2 killing 2 ache 2 wounded 2 persecuted 2 ravages 2 harassed 2 exploited 2 injury 2 brutalisation 2 discomfort 2 unmerciful 2 annihilate 2 exploiters 2 injurers 2 destruction 2 manhandle 2 kills 2 casualties 2 maltreating 2 victims 2 harassing 2 needier 2 smother 2 harassment 2 smothers 2 unhelpful 2 agonize 2 inhumanity 2 duress 2 victimization 2 exploiting 2 cried 2 wounds 2 wounding 2 murderous 2 ravaged 2 uncaringly 2 pains 2 painfulness 2 manhandles 2 bullies 2 assaulted 2 uncharitable 2 distressed 2 persecution 2 murdered 2 ravaging 2 discomforted 2 exploitation 2 torture 2 murderers 2 aches 2 afflictions 2 ungenerous 2 victimizer 2 agonizing 2 paining 2 persecuting 2 exploit 2 harassers 2 malevolent 2 stabs 2 sorrowful 2 assaults 2 needy 2 affliction 2 cries 2 fighting 2 fight 2 attack 2 annihilates 2 sorrow 2 agonized 2 assaulting 2 inhospitable 2 threats 2 ached 2 rapists 2 abuser 2 raped 2 assassinates 2 stabbed 2 inhospitality 2 annihilation 2 punch 2 harshness 2 abusers 2 killer 2 sufferers 2 victimizers 2 smite 2 killers 2 discomforts 2 fatalities 2 molested 2 brutality 2 murdering 2 torturer 2 torturers 2 fights 2 harmfulness 2 bullying 2 casualty 2 sufferer 2 exploiter 2 fatality 2 punches 2 abuses 2 attacks 2 vulnerability 2 carnage 2 tribulation 2 annihilator 2 smothering 2 bullyboy 2 murderer 2 wound 2 stabber 2 tormenters 2 malevolence 2 raping 2 smothered 2 assault 2 genocidal 2 anguishing 2 aching 2 anguished 2 stabbing 2 rapist 2 harasser 2 hungers 2 hunger 2 molesting 2 rape 2 molesters 2 punched 2 violence 2 distressingly 2 molester 2 stabbers 2 neediness 2 assassinate 2 agonizingly 2 tribulations 2 unhelpfulness 2 assaulter 2 puncher 2 punching 2 rapes 2 genocides 2 attackers 2 tormentor 2 assassinations 2 destroyers 2 punchers 2 sorrows 2 tormenter 2 threateningly 2 murder 2 destroyer 2 assassinating 2 crier 2 assassinated 2 molestation 2 attacker 2 murders 2 genocide 2 fighter 2 assassins 2 assaulters 2 hungering 2 achingly 2 hungered 2 murderess 2 assassin 2 exploits 2 fighters 2 assassination 2 equality 3 fairness 3 justice 3 rights 3 equitable 3 civil rights 3 fairplay 3 impartiality 3 equal 3 fairminded 3 proportionality 3 equalities 3 fair 3 integrity 3 impartial 3 reciprocity 3 honesty 3 egalitarian 3 civil right 3 law 3 justness 3 unbias 3 egalitarians 3 parity 3 objectiveness 3 reparations 3 unprejudiced 3 justices 3 laws 3 tribunals 3 retribution 3 reparation 3 lawfully 3 lawful 3 honest 3 compensation 3 lawyers 3 sportsmanship 3 tribunal 3 do unto others 3 golden rule 3 lawyer 3 proportional 3 equity 3 lawyering 3 trust 3 reciprocal 3 being objective 3 justification 3 trustworthiness 3 unbiased 3 vengeance 3 revenge 3 retributions 3 equals 3 equalize 3 refereeing 3 restitution 3 compensating 3 been objective 3 pay back 3 justified 3 justifies 3 retaliation 3 lawyered 3 compensated 3 referees 3 karma 3 will share 3 avenger 3 trusting 3 avengers 3 square deal 3 trusts 3 compensate 3 trustworthy 3 levels the playing field 3 tit for tat 3 retaliate 3 level the playing field 3 eye for an eye 3 square deals 3 repayment 3 payback 3 equities 3 justify 3 dues 3 square dealing 3 referee 3 repaid 3 square dealer 3 equalizer 3 due processes 3 level playing fields 3 repay 3 compensates 3 justifying 3 due processing 3 due process 3 repayments 3 repaying 3 level playing field 3 retaliating 3 square dealers 3 retaliated 3 refereed 3 revenger 3 avenging 3 repays 3 trusted 3 avenge 3 retaliates 3 equalizers 3 avenged 3 avenges 3 cheat 4 unfair 4 cheating 4 unfairness 4 injustice 4 fraud 4 dishonest 4 unjust 4 cheated 4 fraudulent 4 cheats 4 frauds 4 dishonesty 4 cheaters 4 deception 4 injustices 4 swindle 4 inequity 4 hypocrisy 4 discrimination 4 unequal 4 cheater 4 inequities 4 defraud 4 racism 4 scam 4 liar 4 defrauds 4 betrayal 4 deceipt 4 defrauded 4 inequality 4 liars 4 defrauders 4 hypocrite 4 biased 4 ripoffs 4 scams 4 fleecing 4 defrauder 4 discriminates 4 mislead 4 inequalities 4 prejudice 4 fleeced 4 defrauding 4 ripoff 4 scamming 4 imposters 4 exploitation 4 crooked 4 oppress 4 racist 4 oppression 4 imposter 4 swindled 4 hypocrites 4 plagiarism 4 lied 4 untrustworthiness 4 hoodwink 4 scammed 4 blackmail 4 bilks 4 swindling 4 betrayed 4 bias 4 connive 4 crooks 4 deceive 4 freeloaders 4 favoritism 4 disparity 4 swindles 4 deceived 4 exploiters 4 misleading 4 discriminated 4 bilked 4 deceiving 4 untrustworthy 4 prejudiced 4 false advertise 4 scammers 4 swindler 4 theft 4 duplicitous 4 hoodwinked 4 bigoted 4 sexism 4 disproportionate 4 swindlers 4 discriminate 4 conniving 4 sexist 4 betraying 4 hoodwinking 4 partiality 4 misleads 4 disproportion 4 economic disparity 4 exploiter 4 bilk 4 biases 4 bigots 4 distrust 4 dupe 4 crook 4 racists 4 con artist 4 bilking 4 blackmailing 4 deceives 4 betrayers 4 deceiver 4 blackmailed 4 duping 4 shyster 4 connivers 4 imbalanced 4 con artists 4 sexists 4 thieving 4 betray 4 imbalance 4 disproportions 4 disproportionately 4 freeloader 4 misleaders 4 connived 4 shysters 4 scammer 4 connives 4 conniver 4 disadvantaged 4 plagiaristic 4 moocher 4 dupes 4 discriminating 4 tricked 4 segregation 4 false advertised 4 thief 4 betrayer 4 bigot 4 exploiting 4 lying 4 thieves 4 stealing 4 suckered 4 deceivers 4 bamboozled 4 false advertisement 4 freeload 4 bamboozle 4 did rob 4 freeloading 4 steal 4 pickpocketing 4 blackmailer 4 prejudicing 4 chauvinists 4 exploit 4 misleader 4 hoodwinks 4 false advertiser 4 imbalances 4 pickpocketed 4 exploited 4 pickpockets 4 bamboozles 4 tricking 4 taking advantage 4 pickpocket 4 false advertisers 4 biasing 4 false impression 4 bamboozling 4 false witness 4 robs 4 moochers 4 betrays 4 robbing 4 false advertises 4 false impressions 4 blackmails 4 double cross 4 blackmailers 4 will rob 4 stolen 4 distrustful 4 false advertising 4 double crossers 4 mooches 4 disproportioned 4 mooching 4 segregated 4 double crosser 4 robbed 4 misleadingly 4 segregating 4 stole 4 double crosses 4 being partial 4 exploits 4 distrusts 4 mooch 4 segregate 4 robbers 4 distrusted 4 double crossing 4 distrusting 4 double crossed 4 be partial 4 go back on 4 stacking the deck 4 robber 4 segregates 4 ripping off 4 trickster 4 rips off 4 behind their backs 4 mooched 4 stacked the deck 4 was partial 4 am partial 4 stacked deck 4 stacks the deck 4 behind their back 4 been partial 4 free rider 4 ripped off 4 free riders 4 deceivingly 4 steals 4 unequaled 4 team player 5 player 5 patriot 5 loyal 5 loyalty 5 patriots 5 follower 5 fidelity 5 allegiance 5 ally 5 comrade 5 loyalties 5 death do us part 5 faction 5 comrades 5 allegiances 5 sacrifice 5 allies 5 organization 5 followers 5 us against them 5 sacrifices 5 all for one 5 comradery 5 one for all 5 fellow 5 family 5 allegiant 5 corps 5 unity 5 union jack 5 uniter 5 old glory 5 companions 5 country 5 companion 5 homeland 5 sacrificing 5 indivisible 5 sacrificed 5 solidarity 5 troops 5 nation 5 cult 5 kinship 5 companionship 5 clique 5 allied 5 community 5 group 5 factions 5 familiarity 5 solidarities 5 enlist 5 companionships 5 wife 5 united 5 belongs 5 congregation 5 brothers in arms 5 clan 5 trooper 5 sect 5 enlisted 5 enlistment 5 tribalism 5 cohorts 5 war 5 joining 5 troop 5 sacrificial 5 coalition 5 insider 5 pledge 5 cohort 5 enlisting 5 unite 5 communion 5 familiarities 5 belong 5 ingroup 5 belonged 5 company 5 collective 5 fellows 5 cliques 5 uniting 5 clans 5 hazing 5 congregates 5 herd 5 sects 5 uniters 5 undivided 5 unites 5 pledgers 5 coalitions 5 enlists 5 grouping 5 insiders 5 families 5 troupe 5 fellowship 5 kin 5 pledger 5 horde 5 nations 5 tribe 5 hordes 5 pledges 5 herder 5 commune 5 cults 5 congregations 5 organizations 5 herds 5 pledging 5 communities 5 familiar 5 hazings 5 belonging 5 pledged 5 bowed 5 collectively 5 together 5 groups 5 homelands 5 collectives 5 troopers 5 tribes 5 companies 5 countries 5 troupes 5 fellowships 5 tribal 5 communes 5 herders 5 grouped 5 herding 5 congregate 5 herded 5 congregating 5 traitor 6 disloyal 6 treason 6 traitors 6 betray 6 betraying 6 betrayer 6 betrayers 6 unpatriotic 6 betrayed 6 treachery 6 enemies 6 backstabber 6 backstabbed 6 heretic 6 enemy 6 betrays 6 deserter 6 infidels 6 infidel 6 backstab 6 deserting 6 apostate 6 heresy 6 backstabbers 6 heretics 6 unfaithful 6 rebellion 6 desertion 6 deserters 6 apostates 6 unfaithfulness 6 backstabbing 6 rebel 6 cheat on 6 treacherous 6 backstabs 6 heresies 6 outsider 6 outgroup 6 cheated on 6 against us 6 cheating on 6 rebels 6 infidelity 6 outgroups 6 rebellions 6 outsiders 6 cheats on 6 respect 7 obey 7 authority 7 obeyed 7 deference 7 reverence 7 respecting 7 obeying 7 tradition 7 adhere 7 obeys 7 revere 7 govern 7 comply 7 respectful 7 honor 7 adhered 7 allegiance 7 dictates 7 nobility 7 forbid 7 dominion 7 governed 7 obedient 7 reveres 7 adhering 7 governs 7 governing 7 oppress 7 respected 7 respectfully 7 honorable 7 dictate 7 commandments 7 commandment 7 venerate 7 politeness 7 respects 7 obedience 7 divine right 7 forbids 7 permission 7 veneration 7 hierarchy 7 forbade 7 honoring 7 proper 7 venerated 7 stature 7 acquiesce 7 adherence 7 deferential 7 leadership 7 punish 7 forbidding 7 revered 7 filial piety 7 patriarchs 7 decree 7 coerce 7 dominions 7 dictating 7 venerating 7 wear the crown 7 venerates 7 institution 7 monarchical 7 servant 7 decrees 7 permit 7 do as one says 7 supervise 7 duty 7 compliance 7 lionize 7 supervision 7 take orders 7 take up arms 7 duties 7 dictated 7 elders 7 emperors 7 commands 7 acquiesced 7 emperor 7 adheres 7 servants 7 regulations 7 covenant 7 hierarchical 7 subordinate 7 policing 7 decreeing 7 acquiesces 7 authorizing 7 nobles 7 permits 7 matriarchal 7 authorizes 7 control 7 command 7 subordinating 7 hierarchies 7 reverential 7 deferentially 7 punishes 7 patriarch 7 empires 7 honored 7 allegiant 7 protect 7 traditional 7 subordination 7 punished 7 noble 7 order 7 worship 7 social order 7 monarchs 7 ruling 7 lead by example 7 authorities 7 guiding 7 presidents 7 slavishly 7 patriarchy 7 subordinates 7 protection 7 supervisers 7 bow before 7 fathers 7 bow down 7 institutions 7 coersion 7 governors 7 commanded 7 police 7 authorize 7 bullys 7 bully 7 protecting 7 acquiescing 7 empire 7 mentor 7 chiefs 7 monarchies 7 honors 7 preside over 7 acquiescent 7 allegiances 7 bowing 7 oligarchy 7 willing 7 polite 7 supervising 7 pecking order 7 compliantly 7 bishops 7 monarch 7 slaves 7 traitors 7 punishments 7 authorized 7 protector 7 compliant 7 dutiful 7 father 7 punishment 7 coerces 7 toe the line 7 monarchy 7 obediently 7 elder 7 oligarchies 7 dictators 7 leaders 7 bishop 7 lorded over 7 worships 7 coercing 7 protectors 7 dictator 7 protected 7 punishing 7 traitor 7 commanding 7 coerced 7 commanders 7 pope 7 punitive 7 underlings 7 master 7 subordinated 7 president 7 in charge 7 matriarchy 7 lionizing 7 slave 7 chief 7 covenants 7 commander 7 matriarch 7 authorizer 7 guide 7 ordered 7 supervised 7 captains 7 punisher 7 supervises 7 bossing 7 commandant 7 governor 7 protects 7 admiral 7 top gun 7 bowed 7 dominate 7 arrest 7 mentored 7 ordering 7 submit 7 institutional 7 prime minister 7 lionizes 7 ranking 7 boss 7 captain 7 by the book 7 mentors 7 bullies 7 dominant 7 arrested 7 bossed 7 leader 7 rank 7 arresting 7 chieftain 7 prime ministers 7 regulation 7 superviser 7 dean 7 arrests 7 punishers 7 bullied 7 matriarchs 7 controlling 7 managerial 7 bosses 7 ranks 7 controls 7 dictation 7 guides 7 oligarchs 7 principals 7 top dog 7 admirals 7 caste 7 captaining 7 queen 7 mentoring 7 elderly 7 castes 7 governess 7 captained 7 principal 7 bullying 7 submission 7 dominated 7 corporate ladders 7 queens 7 underling 7 corporate ladder 7 fathered 7 dominates 7 dominating 7 presidential 7 oligarch 7 controlled 7 submits 7 submitting 7 head honcho 7 commandingly 7 vice president 7 slaving 7 fathering 7 slaved 7 managers 7 forbiddingly 7 controllers 7 submitted 7 ringleaders 7 ringleader 7 controller 7 ranked 7 manager 7 prime ministerial 7 submissions 7 ceo 7 punishingly 7 submitter 7 submitters 7 disrespect 8 disobey 8 disobedience 8 anarchy 8 chaos 8 subversion 8 subvert 8 lawlessness 8 subverting 8 disrespecting 8 sedition 8 treason 8 overthrow 8 insurrection 8 rebellion 8 transgress 8 treachery 8 dissent 8 dishonor 8 dissention 8 disrespects 8 bedlam 8 rebelling 8 misrule 8 transgression 8 insurrectional 8 pandemonium 8 mutiny 8 mutinies 8 misruling 8 disobedient 8 subverted 8 transgresses 8 transgressed 8 disarray 8 misruled 8 rioting 8 lawless 8 transgressing 8 illegality 8 overthrowing 8 dishonorable 8 dishonoring 8 rebelled 8 rebellions 8 riot 8 dishonouring 8 disrespected 8 permissiveness 8 refuser 8 unruly 8 subverts 8 unlawfulness 8 overthrown 8 anarchistic 8 dishonours 8 riots 8 refuse 8 chaotic 8 nonconformity 8 dissenters 8 uprising 8 insurrections 8 rioters 8 disordering 8 insubordinate 8 mutinied 8 insurrectionist 8 unlawful 8 nonconformists 8 heresy 8 uprisings 8 dishonors 8 tumult 8 overthrew 8 overthrows 8 rabble rousers 8 renegade 8 impolite 8 renegades 8 rabble rousing 8 dishonored 8 illegal 8 rioter 8 mutinous 8 disarrayed 8 apostates 8 dissidents 8 anarchists 8 raise hell 8 disorder 8 refusers 8 permissive 8 apostate 8 anarchist 8 treacherous 8 dissident 8 raises hell 8 disordered 8 heretic 8 overpower 8 rabble rouser 8 rebel 8 raising hell 8 heretics 8 unathorized 8 refusing 8 rebels 8 refuses 8 rioted 8 orders 8 dissenter 8 chaotically 8 nonconformist 8 heresies 8 illegals 8 unlawfully 8 heretical 8 dissents 8 traditions 8 dissenting 8 overpowers 8 trouble maker 8 refused 8 rock the boat 8 overpowering 8 tumultuous 8 overpowered 8 dissented 8 nonconforming 8 sanctity 9 sacred 9 sacredness 9 purity 9 wholesome 9 pureness 9 wholesomeness 9 holiness 9 dignity 9 godly 9 piety 9 sanctify 9 chastity 9 undefiled 9 holy 9 sacrosanct 9 pious 9 righteousness 9 dignities 9 sanctified 9 godliness 9 spirituality 9 chaste 9 sanctifies 9 righteous 9 divine 9 religious 9 biblical 9 spiritual 9 deity 9 sanctifying 9 noble 9 modesty 9 decency 9 scriptures 9 nobility 9 religion 9 hallow 9 soul 9 hallowed 9 deism 9 pristine 9 exalted 9 hallowing 9 eternal 9 holy cross 9 deities 9 faith 9 unadulterated 9 scripture 9 wholesomely 9 divinities 9 worship 9 virgin 9 god 9 catholicism 9 saintly 9 saintliness 9 godess 9 religiosity 9 purify 9 koranic 9 pure 9 holy crosses 9 exalt 9 virginity 9 divinity 9 consecrates 9 heaven 9 virginal 9 devout 9 dignified 9 tabernacle 9 exalts 9 buddhas 9 souls 9 temple 9 unsullied 9 heavenly 9 cleanliness 9 abstinance 9 spotlessness 9 talmudic 9 deists 9 gospels 9 prophets 9 religions 9 temples 9 buddhist 9 godesses 9 saints 9 temperance 9 celibacy 9 consecrated 9 priestly 9 bless 9 marriage 9 prophet 9 exalting 9 unchaste 9 supernatural 9 eternally 9 purification 9 apostles 9 monastic 9 purified 9 communion 9 gods 9 celibate 9 christians 9 theological 9 monasticism 9 unspoiled 9 sterility 9 christian 9 buddha 9 deist 9 prophetic 9 saint 9 righteously 9 apostle 9 prayer 9 faiths 9 shrine 9 purifying 9 worships 9 virgins 9 glorious 9 dignifies 9 atonement 9 deification 9 orthodoxy 9 hallows 9 enshrining 9 nunneries 9 church 9 religiously 9 blessings 9 consecrate 9 gospel 9 pray 9 beatifying 9 yogis 9 theology 9 purifies 9 orthodox 9 untainted 9 torah 9 faithfully 9 catholic 9 heavens 9 yogi 9 consecrating 9 blessed 9 faithful 9 koran 9 abstinence 9 jesus 9 monastery 9 purities 9 consecration 9 catholics 9 prayers 9 prayed 9 sterile 9 blesses 9 enshrined 9 torahs 9 organic 9 bible 9 glory 9 allah 9 glories 9 priests 9 dignifying 9 enshrine 9 mosques 9 spotlessly 9 prude 9 reverend 9 soulful 9 deify 9 christ 9 cathedrals 9 churches 9 cathedral 9 dignify 9 monasteries 9 raw 9 enshrines 9 refinement 9 nuns 9 monks 9 gloriously 9 almighty 9 marring 9 repent 9 prays 9 clean 9 orthodoxies 9 exterminates 9 rabbis 9 spotless 9 bibles 9 mosque 9 immaculate 9 organics 9 purifier 9 foods 9 lord 9 praying 9 repenting 9 marry 9 elevating 9 marrying 9 immaculately 9 rabbi 9 nunnery 9 priest 9 food 9 bloodiness 9 marries 9 synagogues 9 synagogue 9 refined 9 repents 9 angel 9 blessing 9 monk 9 rabbinical 9 organically 9 pope 9 nun 9 nobles 9 prophetically 9 blood 9 repented 9 pastor 9 purifiers 9 lords 9 bloody 9 untouched 9 cleaning 9 exterminating 9 exterminated 9 imam 9 higher power 9 cleaners 9 married 9 beatification 9 beatify 9 extermination 9 exterminate 9 cleaner 9 body 9 immune 9 atoning 9 imams 9 cleaned 9 atones 9 mary 9 refines 9 cleans 9 atone 9 immunities 9 immunity 9 stainless 9 refining 9 refine 9 atoned 9 exterminator 9 exterminators 9 impurity 10 degradation 10 depravity 10 desecrate 10 desecration 10 repulsiveness 10 degrading 10 decay 10 filth 10 depravities 10 defile 10 sin 10 fornication 10 repulsive 10 depraved 10 impiety 10 degrade 10 repugnance 10 impure 10 degraded 10 desecrations 10 sinfulness 10 impurities 10 indecencies 10 defiled 10 defiles 10 uncleanliness 10 damnation 10 debauchery 10 impious 10 sinful 10 necrophiliacs 10 desecrates 10 sleaziness 10 desecrating 10 desecrated 10 grossness 10 contaminates 10 sinning 10 promiscuity 10 befouls 10 rottenness 10 hedonism 10 revolting 10 repugnant 10 godless 10 scum 10 befoul 10 satanic 10 sluttiness 10 disgusting 10 pestilence 10 debased 10 trashiness 10 sins 10 degradingly 10 corrupting 10 deprave 10 perverted 10 debase 10 fornicating 10 degraders 10 defiling 10 slime 10 horrors 10 repugnantly 10 defiler 10 deviants 10 degrades 10 corrupts 10 debasing 10 perverts 10 parasitic 10 disgusts 10 deflowering 10 hedonistic 10 deviant 10 scummy 10 horrifying 10 necrophilia 10 contamination 10 rot 10 stain 10 contaminating 10 contaminants 10 dirtying 10 debases 10 contaminate 10 abhor 10 heresy 10 sleaze 10 staining 10 defilers 10 harlot 10 plagues 10 sullies 10 fornicators 10 vermin 10 befouling 10 incest 10 trashy 10 excreting 10 deforms 10 abhored 10 decayed 10 whores 10 deformities 10 perverse 10 adultery 10 fornicate 10 excrement 10 harlots 10 decaying 10 fornicator 10 unclean 10 nauseating 10 sully 10 heresies 10 satan 10 damns 10 satanically 10 sinned 10 sinners 10 adulterous 10 repulses 10 corruption 10 tainting 10 deformity 10 necrophiliac 10 decays 10 corrupted 10 deforming 10 contaminant 10 disgust 10 tarnishes 10 hell 10 filthy 10 taint 10 horrific 10 fecal 10 dirtied 10 flesh 10 stained 10 deform 10 putrid 10 scatalogical 10 dirties 10 whoring 10 cocksucker 10 plague 10 adulterers 10 excretes 10 infesting 10 slimy 10 excrete 10 scuzz 10 horror 10 tarnish 10 sexuality 10 parasite 10 obscenity 10 deformed 10 adulterer 10 befouled 10 muck 10 corpses 10 soiled 10 infest 10 incestuously 10 incestuous 10 fucker 10 devil 10 parasites 10 stains 10 skanks 10 corpse 10 whore 10 lepers 10 curses 10 corrupt 10 pathogens 10 diseased 10 deflower 10 hedonists 10 sinner 10 debaucherous 10 fester 10 hedonist 10 sleazy 10 fucks 10 promiscuous 10 cursed 10 curse 10 apostates 10 cocksuckers 10 heretic 10 lewdness 10 slutty 10 infests 10 festers 10 pervert 10 fuck 10 skanky 10 dirty 10 mucky 10 puke 10 alcoholism 10 feces 10 sullied 10 disgustingly 10 sexual 10 cunt 10 taints 10 profane 10 heretics 10 fucking 10 tarnishing 10 fornicated 10 mar 10 shitting 10 slut 10 obscene 10 barf 10 rotten 10 disgusted 10 cunts 10 waste 10 parasitically 10 sinfully 10 wastes 10 vomit 10 pathogen 10 rats 10 pathogenic 10 indecent 10 infect 10 leper 10 indecently 10 shit 10 abhors 10 skank 10 infestation 10 deflowered 10 leprosy 10 diseases 10 heretical 10 dirt 10 cursing 10 tarnishment 10 disease 10 prostitution 10 infested 10 apostate 10 sluts 10 fuckers 10 profanity 10 addiction 10 contaminated 10 scuzzy 10 infectiousness 10 indecency 10 vomitted 10 germ 10 prostituting 10 excreted 10 rubbish 10 fucked 10 sodomy 10 untouchables 10 epidemics 10 swear 10 shits 10 whorehouses 10 pigsty 10 germs 10 prostituted 10 mud 10 dung 10 epidemic 10 rat 10 douchebag 10 perversely 10 pukes 10 puking 10 prostitutes 10 barfs 10 slutting 10 trashing 10 whored 10 douchebags 10 infection 10 shite 10 spoil 10 gross 10 repulsed 10 pus 10 festering 10 cockroaches 10 tainted 10 contagion 10 barfed 10 infects 10 damned 10 addictions 10 shitty 10 skanking 10 trash 10 whorehouse 10 phlegm 10 moldy 10 plaguing 10 shat 10 drugged 10 garbage 10 infecting 10 pandemics 10 viruses 10 nauseated 10 cockroach 10 puked 10 drugging 10 manure 10 mucking 10 lewd 10 alcoholics 10 gangrenous 10 barfing 10 gangrene 10 shitter 10 shittier 10 tarnished 10 cock 10 vomits 10 hookers 10 damn 10 addict 10 alcoholic 10 nausea 10 swearing 10 vomitting 10 skanked 10 infections 10 foul 10 prostitute 10 risque 10 lice 10 gonorrhea 10 wasting 10 profanities 10 divorces 10 crappy 10 spreading 10 wasters 10 addicting 10 trashed 10 addicts 10 scabies 10 swore 10 nauseous 10 phlegmatically 10 spoiling 10 nauseatingly 10 drugs 10 virus 10 waster 10 untouchable 10 addicted 10 damning 10 pandemic 10 hooker 10 bm 10 infected 10 festered 10 marred 10 phlegmatic 10 divorce 10 viral 10 contagiously 10 plagued 10 repulsing 10 swears 10 drug 10 spoiled 10 cum 10 divorcing 10 wasted 10 divorced 10 contagious 10 -------------------------------------------------------------------------------- /sources/MFD/moral_foundations_dictionary.dic: -------------------------------------------------------------------------------- 1 | % 2 | 01 HarmVirtue 3 | 02 HarmVice 4 | 03 FairnessVirtue 5 | 04 FairnessVice 6 | 05 IngroupVirtue 7 | 06 IngroupVice 8 | 07 AuthorityVirtue 9 | 08 AuthorityVice 10 | 09 PurityVirtue 11 | 10 PurityVice 12 | 11 MoralityGeneral 13 | % 14 | safe* 01 15 | peace* 01 16 | compassion* 01 17 | empath* 01 18 | sympath* 01 19 | care 01 20 | caring 01 21 | protect* 01 22 | shield 01 23 | shelter 01 24 | amity 01 25 | secur* 01 26 | benefit* 01 27 | defen* 01 28 | guard* 01 29 | preserve 01 07 09 30 | 31 | harm* 02 32 | suffer* 02 33 | war 02 34 | wars 02 35 | warl* 02 36 | warring 02 37 | fight* 02 38 | violen* 02 39 | hurt* 02 40 | kill 02 41 | kills 02 42 | killer* 02 43 | killed 02 44 | killing 02 45 | endanger* 02 46 | cruel* 02 47 | brutal* 02 48 | abuse* 02 49 | damag* 02 50 | ruin* 02 10 51 | ravage 02 52 | detriment* 02 53 | crush* 02 54 | attack* 02 55 | annihilate* 02 56 | destroy 02 57 | stomp 02 58 | abandon* 02 06 59 | spurn 02 60 | impair 02 61 | exploit 02 10 62 | exploits 02 10 63 | exploited 02 10 64 | exploiting 02 10 65 | wound* 02 66 | 67 | fair 03 68 | fairly 03 69 | fairness 03 70 | fair-* 03 71 | fairmind* 03 72 | fairplay 03 73 | equal* 03 74 | justice 03 75 | justness 03 76 | justifi* 03 77 | reciproc* 03 78 | impartial* 03 79 | egalitar* 03 80 | rights 03 81 | equity 03 82 | evenness 03 83 | equivalent 03 84 | unbias* 03 85 | tolerant 03 86 | equable 03 87 | balance* 03 88 | homologous 03 89 | unprejudice* 03 90 | reasonable 03 91 | constant 03 92 | honest* 03 11 93 | 94 | unfair* 04 95 | unequal* 04 96 | bias* 04 97 | unjust* 04 98 | injust* 04 99 | bigot* 04 100 | discriminat* 04 101 | disproportion* 04 102 | inequitable 04 103 | prejud* 04 104 | dishonest 04 105 | unscrupulous 04 106 | dissociate 04 107 | preference 04 108 | favoritism 04 109 | segregat* 04 05 110 | exclusion 04 111 | exclud* 04 112 | together 05 113 | nation* 05 114 | homeland* 05 115 | family 05 116 | families 05 117 | familial 05 118 | group 05 119 | loyal* 05 07 120 | patriot* 05 121 | communal 05 122 | commune* 05 123 | communit* 05 124 | communis* 05 125 | comrad* 05 126 | cadre 05 127 | collectiv* 05 128 | joint 05 129 | unison 05 130 | unite* 05 131 | fellow* 05 132 | guild 05 133 | solidarity 05 134 | devot* 05 135 | member 05 136 | cliqu* 05 137 | cohort 05 138 | ally 05 139 | insider 05 140 | foreign* 06 141 | enem* 06 142 | betray* 06 08 143 | treason* 06 08 144 | traitor* 06 08 145 | treacher* 06 08 146 | disloyal* 06 08 147 | individual* 06 148 | apostasy 06 08 10 149 | apostate 06 08 10 150 | deserted 06 08 151 | deserter* 06 08 152 | deserting 06 08 153 | deceiv* 06 154 | jilt* 06 155 | imposter 06 156 | miscreant 06 157 | spy 06 158 | sequester 06 159 | renegade 06 160 | terroris* 06 161 | immigra* 06 162 | obey* 07 163 | obedien* 07 164 | duty 07 165 | law 07 166 | lawful* 07 11 167 | legal* 07 11 168 | duti* 07 169 | honor* 07 170 | respect 07 171 | respectful* 07 172 | respected 07 173 | respects 07 174 | order* 07 175 | father* 07 176 | mother 07 177 | motherl* 07 178 | mothering 07 179 | mothers 07 180 | tradition* 07 181 | hierarch* 07 182 | authorit* 07 183 | permit 07 184 | permission 07 185 | status* 07 186 | rank* 07 187 | leader* 07 188 | class 07 189 | bourgeoisie 07 190 | caste* 07 191 | position 07 192 | complian* 07 193 | command 07 194 | supremacy 07 195 | control 07 196 | submi* 07 197 | allegian* 07 198 | serve 07 199 | abide 07 200 | defere* 07 201 | defer 07 202 | revere* 07 203 | venerat* 07 204 | comply 07 205 | defian* 08 206 | rebel* 08 207 | dissent* 08 208 | subver* 08 209 | disrespect* 08 210 | disobe* 08 211 | sediti* 08 212 | agitat* 08 213 | insubordinat* 08 214 | illegal* 08 215 | lawless* 08 216 | insurgent 08 217 | mutinous 08 218 | defy* 08 219 | dissident 08 220 | unfaithful 08 221 | alienate 08 222 | defector 08 223 | heretic* 08 10 224 | nonconformist 08 225 | oppose 08 226 | protest 08 227 | refuse 08 228 | denounce 08 229 | remonstrate 08 230 | riot* 08 231 | obstruct 08 232 | piety 09 11 233 | pious 09 11 234 | purity 09 235 | pure* 09 236 | clean* 09 237 | steril* 09 238 | sacred* 09 239 | chast* 09 240 | holy 09 241 | holiness 09 242 | saint* 09 243 | wholesome* 09 11 244 | celiba* 09 245 | abstention 09 246 | virgin 09 247 | virgins 09 248 | virginity 09 249 | virginal 09 250 | austerity 09 251 | integrity 09 11 252 | modesty 09 253 | abstinen* 09 254 | abstemiousness 09 255 | upright 09 11 256 | limpid 09 257 | unadulterated 09 258 | maiden 09 259 | virtuous 09 260 | refined 09 261 | decen* 09 11 262 | immaculate 09 263 | innocent 09 264 | pristine 09 265 | church* 09 266 | disgust* 10 267 | deprav* 10 268 | disease* 10 269 | unclean* 10 270 | contagio* 10 271 | indecen* 10 11 272 | sin 10 273 | sinful* 10 274 | sinner* 10 275 | sins 10 276 | sinned 10 277 | sinning 10 278 | slut* 10 279 | whore 10 280 | dirt* 10 281 | impiety 10 282 | impious 10 283 | profan* 10 284 | gross 10 285 | repuls* 10 286 | sick* 10 287 | promiscu* 10 288 | lewd* 10 289 | adulter* 10 290 | debauche* 10 291 | defile* 10 292 | tramp 10 293 | prostitut* 10 294 | unchaste 10 295 | intemperate 10 296 | wanton 10 297 | profligate 10 298 | filth* 10 299 | trashy 10 300 | obscen* 10 301 | lax 10 302 | taint* 10 303 | stain* 10 304 | tarnish* 10 305 | debase* 10 306 | desecrat* 10 307 | wicked* 10 11 308 | blemish 10 309 | exploitat* 10 310 | pervert 10 311 | wretched* 10 11 312 | righteous* 11 313 | moral* 11 314 | ethic* 11 315 | value* 11 316 | upstanding 11 317 | good 11 318 | goodness 11 319 | principle* 11 320 | blameless 11 321 | exemplary 11 322 | lesson 11 323 | canon 11 324 | doctrine 11 325 | noble 11 326 | worth* 11 327 | ideal* 11 328 | praiseworthy 11 329 | commendable 11 330 | character 11 331 | proper 11 332 | laudable 11 333 | correct 11 334 | wrong* 11 335 | evil 11 336 | immoral* 11 337 | bad 11 338 | offend* 11 339 | offensive* 11 340 | transgress* 11 341 | -------------------------------------------------------------------------------- /sources/NRC/create_data_dictionary-NRC.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | 3 | class(data_dictionary_NRC) <- "dictionary2" 4 | data_dictionary_NRC <- as.dictionary(data_dictionary_NRC) 5 | 6 | meta(data_dictionary_NRC) <- 7 | list( 8 | title = "NRC Word-Emotion Association Lexicon", 9 | description = "A quanteda dictionary object containing Mohammad and Charron's (2010, 2013) English version of the NRC Word-Emotion Association Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust) and two sentiments (negative and positive) manually annotated on Amazon's Mechanical Turk. Available in 40 different languages.", 10 | url = "http://saifmohammad.com/WebPages/AccessResource.htm", 11 | description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).", 12 | license = "Open, or for commercial for CAD $975.", 13 | reference = "Mohammad, S. & Turney, P. (2013). Crowdsourcing a Word-Emotion Association Lexicon. _Computational Intelligence_, 29(3), 436--465. https://arxiv.org/abs/1308.6297 14 | 15 | Mohammad, S. & Turney, P. (2010). Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon. In _Proceedings of the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text_, June 2010, LA, California. https://dl.acm.org/doi/10.5555/1860631.1860635" 16 | ) 17 | 18 | polarity(data_dictionary_NRC) <- list(pos = c("positive"), neg = c("negative")) 19 | 20 | usethis::use_data(data_dictionary_NRC, overwrite = TRUE) 21 | -------------------------------------------------------------------------------- /sources/RID/create-data_dictionary_RID.R: -------------------------------------------------------------------------------- 1 | # Regressive Imagery Dictionary 2 | 3 | library("quanteda") 4 | 5 | data_dictionary_RID <- 6 | dictionary(file = "RID/RID.CAT") 7 | devtools::use_data(data_dictionary_RID, overwrite = TRUE) 8 | -------------------------------------------------------------------------------- /sources/Rauh/Rauh_SentDictionaryGerman.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman.Rdata -------------------------------------------------------------------------------- /sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata -------------------------------------------------------------------------------- /sources/Rauh/create-data_dictionary_Rauh.R: -------------------------------------------------------------------------------- 1 | #' Rauh's German Political Sentiment Dictionary 2 | 3 | library("quanteda") 4 | library("dplyr") 5 | 6 | # load dictionary dataframes (downloaded here: https://doi.org/10.7910/DVN/BKBX) 7 | load("Rauh/Rauh_SentDictionaryGerman_Negation.Rdata") 8 | load("Rauh/Rauh_SentDictionaryGerman.Rdata") 9 | 10 | # new column where NOT and word are divided with a space 11 | neg.sent.dictionary <- neg.sent.dictionary %>% 12 | mutate(word = gsub("NOT_", "NOT ", feature)) %>% 13 | mutate(sentiment = ifelse(sentiment == 1, "neg_negative", "neg_positive")) 14 | 15 | sent.dictionary <- sent.dictionary %>% 16 | mutate(word = feature) %>% 17 | mutate(sentiment = ifelse(sentiment == -1, "negative", "positive")) 18 | 19 | # bind both dataframes 20 | sent_dictionary_rauh <- bind_rows(sent.dictionary, neg.sent.dictionary) 21 | 22 | # save as quanteda dictionary (word and sentiment column) 23 | data_dictionary_Rauh <- quanteda::as.dictionary(sent_dictionary_rauh) 24 | 25 | data_dictionary_Rauh <- as.dictionary(data_dictionary_Rauh) 26 | meta(data_dictionary_Rauh) <- 27 | list( 28 | title = "Rauh's German Political Sentiment Dictionary", 29 | description = "A quanteda dictionary object containing the dictionaries provided in Rauh (forthcoming). Rauh assesses its performance against human intuition of sentiment in German political language (parliamentary speeches, party manifestos, and media coverage). The resource builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the negation correction provided by the dictionary, currently a combination of tokens_replace and tokens_compound is required to harmonize the five covered bi-gram patterns prior to scoring. The example below shows how to conduct this transformation. Note that the process changes the terms 'nicht|nichts|kein|keine|keinen' to a joint term altering some of the features of the original corpus.", 30 | url = "https://doi.org/10.7910/DVN/BKBXWD", 31 | reference = "Rauh, C. (2018). Validating a Sentiment Dictionary for German Political Language: A Workbench Note. Journal of Information Technology & Politics, 15(4), 319-343. 32 | 33 | Remus, R., Quasthoff U., & Heyer, G. (2010). \"SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.\" In Proceedings of the 7th International Language Resources and Evaluation (LREC'10), 1168-1171. 34 | 35 | Waltinger, U. (2010). \"GermanPolarityClues: A Lexical Resource for German Sentiment Analysis.\" In International Conference on Language Resources and Evaluation, 17-23 May 2010 LREC'10.", 36 | license = "Unknown" 37 | ) 38 | 39 | 40 | polarity(data_dictionary_Rauh) <- 41 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive")) 42 | 43 | usethis::use_data(data_dictionary_Rauh, overwrite = TRUE) 44 | -------------------------------------------------------------------------------- /sources/geninquirer/create-data_dictionary_geninquirer.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | 3 | geninquirer <- read.csv("geninquirer/inquireraugmented.csv", 4 | stringsAsFactors = FALSE, comment.char = "") 5 | GIpos <- 6 | c(geninquirer$Entry[geninquirer$Positiv == "Positiv"], 7 | geninquirer$Entry[geninquirer$Yes == "Yes"]) %>% 8 | char_tolower %>% 9 | stringi::stri_replace_all_regex("#\\w+$", "") %>% 10 | unique 11 | GIneg <- 12 | c(geninquirer$Entry[geninquirer$Negativ == "Negativ"], 13 | geninquirer$Entry[geninquirer$No == "No"]) %>% 14 | char_tolower %>% 15 | stringi::stri_replace_all_regex("#\\w+$", "") %>% 16 | unique 17 | data_dictionary_geninqposneg <- 18 | dictionary(list(positive = GIpos, negative = GIneg)) 19 | 20 | meta(data_dictionary_geninqposneg) <- 21 | list( 22 | title = "Augmented General Inquirer Positiv and Negativ dictionary", 23 | url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm", 24 | description = "A lexicon containing the Positiv and Negativ dictionary entries from the augmented 25 | General Inquirer. These are new valence categories described at 26 | `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also include the 27 | terms from the 'yes' and 28 | 'no' dictionary entries.", 29 | url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm", 30 | license = "Open, but email the creators for commercial use. Many more categories are available.", 31 | reference = "Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966). _The General Inquirer: A Computer Approach to Content Analysis._ Cambridge, MA: MIT Press." 32 | ) 33 | 34 | polarity(data_dictionary_geninqposneg) <- 35 | list(pos = "positive", neg = "negative") 36 | 37 | 38 | usethis::use_data(data_dictionary_geninqposneg, overwrite = TRUE) 39 | -------------------------------------------------------------------------------- /sources/make_sentiment_dictionaries.R: -------------------------------------------------------------------------------- 1 | ## (re)make all sentiment dictionaries 2 | 3 | library("quanteda") 4 | 5 | source("AFINN/create-data_dictionary_AFINN.R") 6 | source("ANEW/create-data_dictionary_ANEW.R") 7 | source("geninquirer/create-data_dictionary_geninquirer.R") 8 | source("Hu-Liu/create_data_dictionary-HuLiu.R") 9 | source("Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R") 10 | source("NRC/create_data_dictionary-NRC.R") 11 | source("Rauh/create-data_dictionary_Rauh.R") 12 | source("sentiws/create-data_dictionary_sentiws.R") 13 | 14 | ## not sentiment dictionaries 15 | # source("Laver-Garry/create-data_dictionary_LaverGarry.R") 16 | # source("MFD/create-data_dictionary_MFD.R") 17 | # source("RID/create-data_dictionary_RID.R") 18 | 19 | # LSD 20 | data("data_dictionary_LSD2015", package = "quanteda") 21 | polarity(data_dictionary_LSD2015) <- 22 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive")) 23 | names(meta(data_dictionary_LSD2015))[which(names(meta(data_dictionary_LSD2015)) == "source")] <- "reference" 24 | usethis::use_data(data_dictionary_LSD2015, overwrite = TRUE) 25 | -------------------------------------------------------------------------------- /sources/sentiws/create-data_dictionary_sentiws.R: -------------------------------------------------------------------------------- 1 | # SentiWS Dictionary 2 | 3 | library("quanteda") 4 | library("dplyr") 5 | library("tidyr") 6 | library("stringr") 7 | 8 | read_senti_scores <- function(filename) { 9 | 10 | results <- read.delim(filename, header = FALSE, encoding="UTF-8") %>% 11 | cbind(str_split_fixed(.$V3, "[,-]",50),stringsAsFactors = FALSE) %>% 12 | mutate( 13 | V1 = str_sub(str_match(V1,".*\\|"),1,-2), 14 | nr = row_number() 15 | ) %>% 16 | select(-V3) %>% 17 | mutate(nr = as.character(nr)) %>% 18 | gather(wordstem,word,V1,1:48, -nr,-V2) %>% 19 | select(word,V2) %>% rename(score=V2) %>% 20 | filter(word != "") %>% 21 | arrange(word) 22 | 23 | } 24 | 25 | positive <- read_senti_scores("sentiws/SentiWS_v1.8c_Positive.txt") %>% 26 | mutate(sentiment = "positive") %>% 27 | unique() 28 | negative <- read_senti_scores("sentiws/SentiWS_v1.8c_Negative.txt") %>% 29 | mutate(sentiment = "negative") %>% 30 | unique() 31 | sentis <- bind_rows(positive, negative) 32 | 33 | data_dictionary_sentiws <- as.dictionary(sentis) 34 | 35 | polarity(data_dictionary_sentiws) <- 36 | list(pos = c("positive"), neg = c("negative")) 37 | valence(data_dictionary_sentiws) <- 38 | list(positive = positive[!duplicated(positive$word), "score"], 39 | negative = negative[!duplicated(negative$word), "score"]) 40 | 41 | meta(data_dictionary_sentiws) <- 42 | list( 43 | title = "SentimentWortschatz (SentiWS)", 44 | description = "A quanteda dictionary object containing SentimentWortschatz (SentiWS), a publicly available German-language resource for sentiment analysis. The current version of SentiWS contains 1,650 positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 negative word forms including their inflections. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one. The original dictionary weights within the interval of -1 to 1. Note that the version implemented in quanteda.dictionaries uses a binary classification into positive (weight > 0) and negative (weight < 0) features.", 45 | url = "http://wortschatz.uni-leipzig.de/en/download/", 46 | reference = "Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly Available German-language Resource for Sentiment Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf). In _Proceedings of the 7th International Language Ressources and Evaluation (LREC'10)_, 1168--1171.", 47 | license = "CC-BY-NC-SA 3.0" 48 | ) 49 | 50 | usethis::use_data(data_dictionary_sentiws, overwrite = TRUE) 51 | 52 | -------------------------------------------------------------------------------- /sources/test-misc.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/test-misc.R -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) 2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE, 3 | skip_on_cran = TRUE) 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library("testthat") 2 | library("quanteda") 3 | library("quanteda.sentiment") 4 | 5 | test_check("quanteda.sentiment") 6 | -------------------------------------------------------------------------------- /tests/testthat/test-data.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | data("data_dictionary_LSD2015", package = "quanteda.sentiment") 3 | 4 | test_that("dictionaries have polarities and valences set", { 5 | skip("skip until digits issue can be solved") 6 | expect_output( 7 | print(data_dictionary_AFINN, 0, 0), 8 | "Dictionary object with 1 key entry.\nValences set for keys: AFINN.", 9 | fixed = TRUE 10 | ) 11 | 12 | expect_output( 13 | print(data_dictionary_ANEW, 0, 0), 14 | "Dictionary object with 3 key entries.\nValences set for keys: pleasure, arousal, dominance ", 15 | fixed = TRUE 16 | ) 17 | 18 | expect_output( 19 | print(data_dictionary_geninqposneg, 0, 0), 20 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ', 21 | fixed = TRUE 22 | ) 23 | 24 | expect_output( 25 | print(data_dictionary_HuLiu, 0, 0), 26 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ', 27 | fixed = TRUE 28 | ) 29 | 30 | expect_output( 31 | print(data_dictionary_LoughranMcDonald, 0, 0), 32 | 'Dictionary object with 9 key entries.\nPolarities: pos = "POSITIVE"; neg = "NEGATIVE" ', 33 | fixed = TRUE 34 | ) 35 | 36 | expect_output( 37 | print(data_dictionary_LSD2015, 0, 0), 38 | 'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ', 39 | fixed = TRUE 40 | ) 41 | 42 | expect_output( 43 | print(data_dictionary_NRC, 0, 0), 44 | 'Dictionary object with 10 key entries.\nPolarities: pos = "positive"; neg = "negative" ', 45 | fixed = TRUE 46 | ) 47 | 48 | expect_output( 49 | print(data_dictionary_Rauh, 0, 0), 50 | 'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ', 51 | fixed = TRUE 52 | ) 53 | 54 | expect_output( 55 | print(data_dictionary_sentiws, 0, 0), 56 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" \nValences set for keys: positive, negative ', 57 | fixed = TRUE 58 | ) 59 | }) 60 | 61 | test_that("dictionaries have metadata set", { 62 | meta_ok <- function(d) { 63 | fields <- c("title", "description", "url", "reference", "license") 64 | tmp <- fields %in% names(meta(d)) 65 | if (all(tmp)) { 66 | TRUE 67 | } else { 68 | warning("MISSING: ", paste(fields[!tmp], collapse = " "), call. = FALSE) 69 | FALSE 70 | } 71 | } 72 | expect_true(meta_ok(data_dictionary_AFINN)) 73 | expect_true(meta_ok(data_dictionary_ANEW)) 74 | expect_true(meta_ok(data_dictionary_geninqposneg)) 75 | expect_true(meta_ok(data_dictionary_HuLiu)) 76 | expect_true(meta_ok(data_dictionary_LoughranMcDonald)) 77 | expect_true(meta_ok(data_dictionary_LSD2015)) 78 | expect_true(meta_ok(data_dictionary_NRC)) 79 | expect_true(meta_ok(data_dictionary_Rauh)) 80 | expect_true(meta_ok(data_dictionary_sentiws)) 81 | }) 82 | 83 | -------------------------------------------------------------------------------- /tests/testthat/test-misc.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | 3 | test_that("printing augmented dictionary works", { 4 | skip("skip until digits issue can be solved") 5 | expect_output( 6 | print(data_dictionary_AFINN, 0, 0), 7 | "Dictionary object with 1 key entry.\nValences set for keys: AFINN ", 8 | fixed = TRUE 9 | ) 10 | 11 | dict <- quanteda::dictionary(list(one = list(oneA = c("a", "b"), 12 | oneB = "d"), 13 | two = c("x", "y"))) 14 | polarity(dict) <- list(pos = "one", neg = "two") 15 | expect_output( 16 | print(dict, 0, 0), 17 | 'Dictionary object with 2 primary key entries and 2 nested levels.\nPolarities: pos = "one"; neg = "two" ', 18 | fixed = TRUE 19 | ) 20 | }) 21 | 22 | test_that("friendly error messages work", { 23 | expect_error( 24 | textstat_polarity(0), 25 | "textstat_polarity() only works on character, corpus, dfm, tokens objects.", 26 | fixed = TRUE 27 | ) 28 | expect_error( 29 | textstat_valence(0), 30 | "textstat_valence() only works on character, corpus, dfm, tokens objects.", 31 | fixed = TRUE 32 | ) 33 | }) 34 | 35 | test_that("subsetting preserves valence and polarity", { 36 | # expect_output( 37 | # print(data_dictionary_ANEW[1], 0, 0), 38 | # "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ", 39 | # fixed = TRUE 40 | # ) 41 | # expect_output( 42 | # print(data_dictionary_ANEW["pleasure"], 0, 0), 43 | # "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ", 44 | # fixed = TRUE 45 | # ) 46 | 47 | dict <- quanteda::dictionary(list(one = c("a", "b"), 48 | two = c("c", "d"), 49 | three = c("e", "f"))) 50 | polarity(dict) <- list(pos = c("one", "two"), neg = "three") 51 | 52 | # expect_output( 53 | # print(dict[c(1, 3)], 0, 0), 54 | # 'Dictionary object with 2 key entries.\nPolarities: pos = "one", "two"; neg = "three"', 55 | # fixed = TRUE 56 | # ) 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-test.R: -------------------------------------------------------------------------------- 1 | test_that("investigate digits problem", { 2 | skip("skip until digits issue can be solved") 3 | 4 | data("data_dictionary_LSD2015", package = "quanteda") 5 | expect_output(print(data_dictionary_LSD2015, max_nkey = 0, max_nval = 0), 6 | "Dictionary object with 4 key entries.", 7 | fixed = TRUE) 8 | 9 | expect_output(print(data_dictionary_geninqposneg, 0, 0), 10 | "Dictionary object with 2 key entries.", 11 | fixed = TRUE) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-textstat_polarity.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | test_that("textstat_polarity works on all object types", { 3 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2", 4 | d2 = "good", 5 | d3 = "notsentiment", 6 | d4 = "Great!", 7 | d5 = "good good") 8 | 9 | smooth <- 0.5 10 | logit <- c(log(3 + smooth) - log(2 + smooth), 11 | log(1 + smooth) - log(0 + smooth), 12 | log(0 + smooth) - log(0 + smooth), 13 | log(1 + smooth) - log(0 + smooth), 14 | log(2 + smooth) - log(0 + smooth)) 15 | 16 | data(data_dictionary_LSD2015, package = "quanteda.sentiment") 17 | 18 | expect_equivalent( 19 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015), 20 | data.frame(doc_id = names(txt), sentiment = logit, stringsAsFactors = FALSE) 21 | ) 22 | expect_identical( 23 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015), 24 | textstat_polarity(corpus(txt), dictionary = data_dictionary_LSD2015) 25 | ) 26 | expect_identical( 27 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015), 28 | textstat_polarity(tokens(txt), dictionary = data_dictionary_LSD2015) 29 | ) 30 | expect_identical( 31 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015), 32 | textstat_polarity(dfm(tokens(txt)), dictionary = data_dictionary_LSD2015) 33 | ) 34 | }) 35 | 36 | test_that("different sentiment functions work as expected", { 37 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2", 38 | d2 = "good", 39 | d3 = "notsentiment", 40 | d4 = "Great!", 41 | d5 = "good good") 42 | 43 | # logit scale 44 | smooth <- 0.5 45 | logit <- c(log(3 + smooth) - log(2 + smooth), 46 | log(1 + smooth) - log(0 + smooth), 47 | log(0 + smooth) - log(0 + smooth), 48 | log(1 + smooth) - log(0 + smooth), 49 | log(2 + smooth) - log(0 + smooth)) 50 | data(data_dictionary_LSD2015, package = "quanteda.sentiment") 51 | expect_equal( 52 | logit, 53 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015)$sentiment 54 | ) 55 | 56 | # relative proportional difference 57 | rpd <- c(3 - 2, 58 | 1 - 0, 59 | 0 - 0, 60 | 1 - 0, 61 | 2 - 0) / c(5, 1, 0, 1, 2) 62 | expect_equal( 63 | rpd, 64 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015, 65 | fun = sent_relpropdiff)$sentiment 66 | ) 67 | 68 | # absolute proportional difference 69 | apd <- c(3 - 2, 70 | 1 - 0, 71 | 0 - 0, 72 | 1 - 0, 73 | 2 - 0) / unname(ntoken(txt)) 74 | expect_equal( 75 | apd, 76 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015, 77 | fun = sent_abspropdiff)$sentiment 78 | ) 79 | }) 80 | 81 | test_that("textstat_polarity error conditions work", { 82 | dict <- dictionary(list( 83 | happy = c("happy", "jubilant", "exuberant"), 84 | sad = c("sad", "morose", "down"), 85 | okay = "just okay" 86 | )) 87 | expect_error( 88 | textstat_polarity("Happy, sad, neutral.", dictionary = dict), 89 | "polarity is not set for this dictionary; see ?polarity", 90 | fixed = TRUE 91 | ) 92 | 93 | }) 94 | 95 | test_that("polarity functions work", { 96 | dict <- dictionary(list( 97 | happy = c("happy", "jubilant", "exuberant"), 98 | sad = c("sad", "morose", "down"), 99 | okay = "just okay" 100 | )) 101 | 102 | expect_equal(polarity(dict), NULL) 103 | 104 | polarity(dict) <- list(pos = "happy", neg = "sad") 105 | expect_identical( 106 | polarity(dict), 107 | list(pos = "happy", neg = "sad") 108 | ) 109 | 110 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay") 111 | expect_identical( 112 | polarity(dict), 113 | list(pos = "happy", neg = "sad", neut = "okay") 114 | ) 115 | 116 | polarity(dict) <- list(pos = c("happy", "okay"), neg = "sad") 117 | expect_identical( 118 | polarity(dict), 119 | list(pos = c("happy", "okay"), neg = "sad") 120 | ) 121 | 122 | expect_error( 123 | polarity(dict) <- list(blank = "happy", neg = "sad"), 124 | "value must be a list of 'pos', 'neg', and (optionally) 'neut'", 125 | fixed = TRUE 126 | ) 127 | expect_error( 128 | polarity(dict) <- list(pos = "happy", neg = "sad", neutr = "okay"), 129 | "value must be a list of 'pos', 'neg', and (optionally) 'neut'", 130 | fixed = TRUE 131 | ) 132 | 133 | # this should generate an error 134 | expect_error( 135 | polarity(dict) <- list(pos = "notfound", neg = "sad"), 136 | "'notfound' key not found in this dictionary" 137 | ) 138 | 139 | # should test that both pos and neg are assigned ? 140 | 141 | }) 142 | 143 | test_that("get_polarity_dictionary() works", { 144 | dict <- dictionary(list( 145 | happy = c("happy", "jubilant", "exuberant"), 146 | sad = c("sad", "morose", "down"), 147 | okay = "just okay" 148 | )) 149 | expect_equal(polarity(dict), NULL) 150 | 151 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay") 152 | 153 | expect_identical( 154 | quanteda.sentiment:::get_polarity_dictionary(dict) |> 155 | quanteda::as.list(), 156 | list(pos = c("happy", "jubilant", "exuberant"), 157 | neg = c("sad", "morose", "down"), 158 | neut = "just okay") 159 | ) 160 | 161 | expect_identical( 162 | quanteda.sentiment:::get_polarity_dictionary(dict) |> polarity(), 163 | list(pos = "pos", neg = "neg", neut = "neut") 164 | ) 165 | 166 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay") 167 | dict["okay"] <- NULL 168 | expect_error( 169 | quanteda.sentiment:::get_polarity_dictionary(dict), 170 | "'okay' key not found in this dictionary" 171 | ) 172 | }) 173 | 174 | test_that("nested scope works for textstatpolarity on tokens", { 175 | dict <- dictionary(list(positive = "good", negative = "not good")) 176 | polarity(dict) <- list(pos = "positive", neg = "negative") 177 | valence(dict) <- c(positive = 1, negative = -1) 178 | toks <- tokens("The test is not good") 179 | 180 | expect_equivalent( 181 | textstat_polarity(toks, dictionary = dict, fun = sent_abspropdiff), 182 | data.frame(doc_id = "text1", sentiment = -0.25, row.names = NULL) 183 | ) 184 | }) 185 | -------------------------------------------------------------------------------- /tests/testthat/test-textstat_valence.R: -------------------------------------------------------------------------------- 1 | library("quanteda") 2 | 3 | test_that("textstat_valence works for uniform valences within key", { 4 | dict <- dictionary(list(positive = c("good", "great"), 5 | negative = c("bad"), 6 | neg_positive = "not good", 7 | neg_negative = "not bad")) 8 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2", 9 | d2 = "good", 10 | d3 = "notsentiment", 11 | d4 = "Great! Not bad.", 12 | d5 = "good good not good bad") 13 | 14 | # for two categories 15 | valence(dict) <- list(positive = 1, negative = -1) 16 | 17 | corp <- corpus(txt) 18 | toks <- tokens(corp) 19 | dfmat <- dfm(toks) 20 | 21 | expect_identical( 22 | textstat_valence(corp, dict), 23 | textstat_valence(toks, dict) 24 | ) 25 | expect_identical( 26 | textstat_valence(corp, dict), 27 | textstat_valence(dfmat, dict) 28 | ) 29 | 30 | expect_identical( 31 | textstat_valence(corp, dict)$sentiment, 32 | c((3 * 1 + 2 * -1) / (3 + 2), 33 | (1 * 1 + 0 * -1) / (1 + 0), 34 | (0 * 1 + 0 * -1) / (1), 35 | (1 * 1 + 1 * -1) / (1 + 1), 36 | (3 * 1 + 1 * -1) / (3 + 1)) 37 | ) 38 | 39 | # for multiple categories within one polarity 40 | valence(dict) <- list(positive = 1, negative = -1, 41 | neg_negative = 1, neg_positive = -1) 42 | expect_identical( 43 | textstat_valence(corp, dict), 44 | textstat_valence(toks, dict) 45 | ) 46 | expect_equal( 47 | all.equal(textstat_valence(corp, dict)$sentiment, 48 | textstat_valence(dfmat, dict)$sentiment), 49 | "Mean relative difference: 1.5" 50 | ) 51 | expect_identical( 52 | textstat_valence(corp, dict)$sentiment, 53 | c((3 * 1 + 2 * -1) / (5), 54 | (1 * 1 + 0 * -1) / (1), 55 | (0 * 1 + 0 * -1) / (1), 56 | (2 * 1 + 0 * -1) / (2), 57 | (2 * 1 + 2 * -1) / (4)) 58 | ) 59 | }) 60 | 61 | test_that("textstat_valence with individual value scores works", { 62 | dict <- dictionary(list( 63 | happy = c("happy", "jubilant", "exuberant"), 64 | sad = c("sad", "morose", "down"), 65 | okay = c("just okay", "okay") 66 | )) 67 | valence(dict) <- list( 68 | happy = c("happy" = 1, "jubilant" = 2, "exuberant" = 2), 69 | sad = c("sad" = -1, "morose" = -2, "down" = -1), 70 | okay = c("just okay" = 0.5, "okay" = 5) 71 | ) 72 | txt <- c(d1 = "sad word happy word exuberant", 73 | d2 = "down sad just okay", 74 | d3 = "sad happy word word") 75 | 76 | corp <- corpus(txt) 77 | toks <- tokens(corp) %>% 78 | tokens_compound(dict, concatenator = " ") 79 | dfmat <- dfm(toks) 80 | 81 | expect_identical( 82 | textstat_valence(corp, dict), 83 | textstat_valence(toks, dict) 84 | ) 85 | expect_identical( 86 | textstat_valence(corp, dict), 87 | textstat_valence(dfmat, dict) 88 | ) 89 | 90 | sent <- c((-1 + 1 + 2) / 3, # 5 91 | (-1 - 1 + 0.5) / 3, 92 | (-1 + 1) / 2) # 4 93 | expect_identical( 94 | textstat_valence(txt, dict), 95 | data.frame(doc_id = docnames(dfmat), 96 | sentiment = sent) 97 | ) 98 | }) 99 | 100 | test_that("textstat_valence error conditions work", { 101 | dict <- dictionary(list( 102 | happy = c("happy", "jubilant", "exuberant"), 103 | sad = c("sad", "morose", "down"), 104 | okay = "just okay" 105 | )) 106 | expect_error( 107 | textstat_valence("Happy, sad, neutral.", dictionary = dict), 108 | "no valenced keys found" 109 | ) 110 | }) 111 | 112 | test_that("valence assignment functions work", { 113 | dict <- dictionary(list( 114 | happy = c("happy", "jubilant", "exuberant"), 115 | sad = c("sad", "morose", "down"), 116 | okay = "just okay" 117 | )) 118 | 119 | expect_equal(valence(dict), NULL) 120 | 121 | expect_error( 122 | valence(dict) <- list(happy = "a", sad = -1), 123 | "valence values must be numeric" 124 | ) 125 | 126 | valence(dict) <- list(happy = 1, sad = -1, okay = 0) 127 | expect_identical( 128 | valence(dict), 129 | list(happy = c(happy = 1, jubilant = 1, exuberant = 1), 130 | sad = c(sad = -1, morose = -1, down = -1), 131 | okay = c(`just okay` = 0)) 132 | ) 133 | }) 134 | 135 | test_that("valence error checks work", { 136 | dict <- dictionary(list(top = c("top1", "top2"), 137 | nested = list(nest1 = c("a", "one"), 138 | nest2 = c("b", "two")))) 139 | expect_error( 140 | valence(dict) <- list(top = c(1, 2), nested = -5), 141 | "valenced dictionaries cannot be nested" 142 | ) 143 | }) 144 | 145 | test_that("dictionary print method shows valence and polarity", { 146 | dict <- dictionary(list( 147 | happy = c("happy", "jubilant", "exuberant"), 148 | sad = c("sad", "morose", "down") 149 | )) 150 | valence(dict) <- c(happy = 1, sad = -1) 151 | expect_output(print(dict), 152 | "Dictionary object with 2 key entries. 153 | Valences set for keys: happy, sad 154 | - [happy]: 155 | - happy, jubilant, exuberant 156 | - [sad]: 157 | - sad, morose, down", fixed = TRUE) 158 | 159 | dict <- dictionary(list( 160 | happiness = c("happy", "jubilant", "exuberant", "content"), 161 | anger = c("mad", "peeved", "irate", "furious", "livid") 162 | )) 163 | valence(dict) <- list(happiness = c(3, 4, 5, 2), 164 | anger = c(3.1, 2.4, 2.9, 4.1, 5.0)) 165 | expect_output(print(dict), 166 | "Dictionary object with 2 key entries. 167 | Valences set for keys: happiness, anger 168 | - [happiness]: 169 | - happy, jubilant, exuberant, content 170 | - [anger]: 171 | - mad, peeved, irate, furious, livid", fixed = TRUE) 172 | }) 173 | 174 | test_that("overlapping values work as expected", { 175 | dict <- dictionary(list( 176 | happy = c("okay", "exuberant"), 177 | sad = c("okay", "depressed") 178 | )) 179 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3), 180 | sad = c(depressed = -4, okay = -2)) 181 | expect_identical( 182 | textstat_valence("Depressed not okay", dict)$sentiment, 183 | (-4 + 1 - 2) / 3 184 | ) 185 | expect_identical( 186 | textstat_valence("Depressed not okay", dict)$sentiment, 187 | textstat_valence(dfm(tokens("Depressed not okay")), dict)$sentiment 188 | ) 189 | }) 190 | 191 | test_that("normalization methods work for textstat_valence", { 192 | dict <- dict <- dictionary(list(positive = c("good", "great"), 193 | negative = c("bad", "awful"))) 194 | valence(dict) <- list(positive = 1, negative = -1) 195 | polarity(dict) <- list(pos = "positive", neg = "negative") 196 | 197 | txt <- c(d1 = "Good good bad other.", 198 | d2 = "Word word other bad!", 199 | d3 = "Great awful other £1.") 200 | toks <- tokens(txt) 201 | 202 | # relative proportional difference 203 | pol_rpd <- data.frame(doc_id = docnames(toks), 204 | sentiment = c( (2 - 1) / 3, 205 | (0 - 1) / 1, 206 | (1 - 1) / 2 )) 207 | expect_equivalent( 208 | textstat_polarity(toks, dict, sent_relpropdiff), 209 | pol_rpd 210 | ) 211 | expect_equivalent( 212 | textstat_valence(toks, dict, normalization = "dictionary"), 213 | textstat_polarity(toks, dict, sent_relpropdiff) 214 | ) 215 | 216 | # absolute proportional difference 217 | pol_apd <- data.frame(doc_id = docnames(toks), 218 | sentiment = c( (2 - 1) / 5, 219 | (0 - 1) / 5, 220 | (1 - 1) / 6 )) 221 | expect_equivalent( 222 | textstat_polarity(toks, dict, sent_abspropdiff), 223 | pol_apd 224 | ) 225 | expect_equivalent( 226 | textstat_valence(toks, dict, normalization = "all"), 227 | textstat_polarity(toks, dict, sent_abspropdiff) 228 | ) 229 | 230 | # no normalization 231 | expect_equivalent( 232 | textstat_valence(toks, dict, normalization = "none"), 233 | data.frame(doc_id = docnames(toks), sentiment = c(1, -1, 0)) 234 | ) 235 | 236 | # logit scale 237 | pol_log <- data.frame(doc_id = docnames(toks), 238 | sentiment = c( log(2 + .5) - log(1 + .5), 239 | log(0 + .5) - log(1 + .5), 240 | log(1 + .5) - log(1 + .5) )) 241 | expect_equivalent( 242 | textstat_polarity(toks, dict, sent_logit), 243 | pol_log 244 | ) 245 | # dfmat <- dfm(toks) %>% 246 | # dfm_lookup(dict) 247 | # dfm_weight(scheme = "logsmooth", base = exp(1)) 248 | # expect_equivalent( 249 | # textstat_valence(dfmat, dict), 250 | # textstat_polarity(toks, dict, sent_logit) 251 | # ) 252 | }) 253 | 254 | test_that("worker functions work", { 255 | dict <- dictionary(list(positive = "good", negative = "bad")) 256 | expect_error( 257 | quanteda.sentiment:::flip_valence(dict), 258 | "valence not set" 259 | ) 260 | }) 261 | 262 | test_that("valence error conditions work", { 263 | dict <- dictionary(list( 264 | happy = c("okay", "exuberant"), 265 | sad = c("okay", "depressed") 266 | )) 267 | 268 | expect_error( 269 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3), 270 | c(depressed = -4, okay = -2)), 271 | "valence must be a fully named list" 272 | ) 273 | 274 | expect_error( 275 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3), 276 | other = c(depressed = -4, okay = -2)), 277 | "'other' is not a dictionary key" 278 | ) 279 | 280 | expect_error( 281 | valence(dict) <- list(happy = c(1, 3, 2)), 282 | "valence value length not equal to number of values for key 'happy'" 283 | ) 284 | }) 285 | 286 | test_that("nested scope works for textstat_valence on tokens", { 287 | dict <- dictionary(list(positive = "good", negative = "not good")) 288 | polarity(dict) <- list(pos = "positive", neg = "negative") 289 | valence(dict) <- c(positive = 1, negative = -1) 290 | toks <- tokens("The test is not good") 291 | 292 | expect_equivalent( 293 | textstat_valence(toks, dictionary = dict), 294 | data.frame(doc_id = "text1", sentiment = -1, row.names = NULL) 295 | ) 296 | }) 297 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/sentiment_analysis.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Dictionary-based sentiment analysis using quanteda" 3 | author: "Kenneth Benoit" 4 | output: 5 | rmarkdown::html_vignette: 6 | toc: true 7 | vignette: > 8 | %\VignetteIndexEntry{Dictionary-based sentiment analysis using quanteda} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r, echo = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | comment = "##" 17 | ) 18 | ``` 19 | 20 | ## Overview 21 | 22 | Sentiment analysis using dictionaries can be applied to any text, tokens, or dfm using `textstat_polarity()` or `textstat_valence()`. This function takes the **quanteda** object as an input, along with a dictionary whose valence or polarity has been set. The two ways of setting dictionary values allow a user to weight each _key_ with a polarity weight, or each _value_ within keys with a _valence_ weight. 23 | 24 | Dictionaries consist of keys and values, where the "key" is the canonical category such as "positive" or "negative", and the "values" consist of the patterns assigned to each key that will be counted as occurrences of those keys when its dictionary is applied using `tokens_lookup()` or `dfm_lookup()`. 25 | 26 | In the Lexicoder Sentiment Dictionary 2015 (`data_dictionary_LSD2015`) that is distributed with *the package, **quanteda**, instance, the dictionary has four keys, with between 1,721 and 2,860 values each: 27 | ```{r} 28 | library("quanteda", warn.conflicts = FALSE, verbose = FALSE) 29 | library("quanteda.sentiment", warn.conflicts = FALSE, verbose = FALSE) 30 | 31 | print(data_dictionary_LSD2015, max_nval = 5) 32 | lengths(data_dictionary_LSD2015) 33 | ``` 34 | As can be seen, these use "glob" pattern matches and may be multi-word values, such as "a lie" or "no damag*". 35 | 36 | 37 | ## Polarity and valence 38 | 39 | Dictionary-based sentiment analysis in **quanteda** can take place in two different forms, depending on whether dictionary keys are part of a _polarity_-based sentiment scheme -- such as positive versus negative dictionary categories (keys) -- or whether a continuous sentiment score is associated with individual word patterns, what we call a _valence_-based sentiment scheme. 40 | 41 | Dictionaries can have both polarity and valence weights, but these are not used in the same sentiment scoring scheme. "Polarity" is a category of one of two "poles" (such as negative and positive) applied to dictionary keys, whereas "valence" is a weight applied individually to each value within a key. 42 | 43 | ### Polarity weights 44 | 45 | Polarity weighting assigns the following categories to dictionary keys, to represent the "poles": 46 | * `pos` -- a "positive" end of the scale, although this notion does not need literally to mean positive sentiment. It could indicate any polar position, such as terms indicating confidence. 47 | * `neg` -- a "negative" end of the scale, although once again, this does not need literally to mean negative sentiment. In the example of "positive" indicating confidence, for instance, the "negative" pole could indicate tentative or uncertain language. 48 | * optionally, a `neut` category can also be identified, if this is desired. 49 | 50 | Dictionary keys are linked to each pole using the `polarity() <-` assignment function. The keys linked to each pole will be indicated in the summary information when the dictionary is printed, or returned as a list when calling the function `polarity()`. 51 | 52 | ```{r} 53 | polarity(data_dictionary_LSD2015) 54 | polarity(data_dictionary_LSD2015) <- list(pos = "positive", neg = "negative") 55 | ``` 56 | 57 | Poles can be linked to multiple dictionary keys. For instance, in the Lexicoder 2015 dictionary, there are also two "negation" keys, 58 | `neg_positive` and `neg_negative`, meant to negate the positive terms, and negate negative terms. To add these to our polarities, we would simply assign them as a list. 59 | 60 | ```{r} 61 | polarity(data_dictionary_LSD2015) 62 | polarity(data_dictionary_LSD2015) <- 63 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive")) 64 | print(data_dictionary_LSD2015, 0, 0) 65 | ``` 66 | 67 | 68 | ### Valence weights 69 | 70 | Valence weighting is value-based, allowing individual numeric weights to be assigned to word patterns ("values"), rather than being a single pole attached to all of the values in a dictionary key. This allows different weights to be assigned within dictionary keys, for instance with different strengths of positivity or negativity. 71 | 72 | If we wanted to nuance this dictionary, for instance, we could assign valences to each key: 73 | ```{r} 74 | dict <- dictionary(list(quality = c("bad", "awful", "horrific", 75 | "good", "great", "amazing"))) 76 | dict 77 | ``` 78 | This dictionary has no valences until they are set. To assign valences, we use the `valence()` replacement function, assigning it a list with the values equal to the dictionary structure. The name of the list elements should match the dictionary key whose valence is being set, and elements each key should be a vector of valences. When this numeric vector is named, order does not matter; otherwise, the order used will be that of the dictionary's values. 79 | ```{r} 80 | valence(dict) <- list(quality = c(amazing = 2.2, awful = -1.5, bad = -1, 81 | horrific = -2, good = 1, great = 1.7)) 82 | ``` 83 | Now, we can see that the valences are set: 84 | ```{r} 85 | dict 86 | valence(dict) 87 | ``` 88 | Because valences are set within key, different keys can have different valences, even when the word values are the same. So we could add a second key like this: 89 | ```{r} 90 | dict["location"] <- dict["quality"] 91 | valence(dict)["location"] <- list(location = c(amazing = 2.2, awful = -1.5, bad = -1, 92 | horrific = -2, good = 1, great = 1.7)) 93 | print(dict, 0, 0) 94 | ``` 95 | 96 | 97 | This allows sentiment to be counted for dictionaries like the [Affective Norms for English Words (ANEW)](https://csea.phhp.ufl.edu/media.html#bottommedia) dictionary, which has numerical weights from 1.0 to 9.0 for word values in each of three categories: pleasure, arousal, and dominance. As a **quanteda** dictionary, this would consist of three dictionary keys (one for each of pleasure, arousal, and dominance) and each word pattern would form a value in each key. Each word value, furthermore, would have a valence. This allows a single dictionary to contain multiple categories of valence, which can be combined or examined separately using `textstat_sentiment()`. We return to the example of the ANEW dictionary below. 98 | 99 | Valence can also be assigned to provide the same weight to every value within a key, making it equivalent to polarity. For instance: 100 | ```{r} 101 | dict <- dictionary(list(neg = c("bad", "awful", "horrific"), 102 | pos = c("good", "great", "amazing"))) 103 | valence(dict) <- list(neg = -1, pos = 1) 104 | print(dict) 105 | valence(dict) 106 | ``` 107 | 108 | ### Effects of polarity and valence weights on other functions 109 | 110 | These weights are not currently used by any function other than `textstat_polarity()` and `textstat_valence()`. When using dictionaries with a polarity or valence in any other function, these have no effect. Dictionaries with polarity or valence set operate in every other respect just like regular **quanteda** dictionaries with no polarity or valence. 111 | 112 | 113 | ## Computing sentiment with polarities 114 | 115 | ### Simple example with the LSD 2015 dictionary 116 | 117 | Let's take simple example of a text with some positive and negative words found in the LSD2015 dictionary. The polarities of this dictionary are assigned by default, so we will erase our local copy and use the one found in the **quanteda.sentiment** package. 118 | 119 | ```{r} 120 | txt <- c(doc1 = "This is a fantastic, wonderful example.", 121 | doc2 = "The settlement was not amiable.", 122 | doc3 = "The good, the bad, and the ugly.") 123 | toks <- tokens(txt) 124 | 125 | data("data_dictionary_LSD2015", package = "quanteda.sentiment") 126 | polarity(data_dictionary_LSD2015) 127 | ``` 128 | 129 | First, let's see what will be matched. 130 | ```{r} 131 | tokens_lookup(toks, data_dictionary_LSD2015, nested_scope = "dictionary", 132 | exclusive = FALSE) 133 | ``` 134 | Notice the `nested_scope = "dictionary"` argument. This tells the lookup function to consider the scope at which to stop "nesting" the value matches across the dictionary, rather than the default which is within keys. Otherwise, the tokens "not", "amiable" in `doc2` would be matched twice: one for the positive key, matched from the value `"amiab*"`; and once for the `neg_positive` key, matched from the value `not amiab*"`. With the entire dictionary as the `nested_scope`, however, the (`neg_positive`) `"not amiab*"` is matched first, and then the shorter value from the other (`positive`) key `"amiab*"` is not also matched. 135 | 136 | To compute a polarity-based sentiment score, we need a formula specifying how the categories will be combined. This is supplied through the `fun` argument, which names a function for scoring sentiment through a combination of `pos`, `neg`, and optionally `neut` and `N`, where `N` is short for the total number of tokens or features. 137 | 138 | The **quanteda.sentiment** package includes three functions for converting polarities into a continuous index of sentiment, from Lowe et. al. (2011). These are: 139 | 140 | * `sent_logit`, a logit scale computed as $\mathrm{log}(pos + 0.5) - \mathrm{log}(neg + 0.5))$, also the default method; 141 | 142 | * `sent_abspropdiff`, the "absolute proportional difference" scale comparing the difference between positive and negative mentions as a proportion of all counts: computed as $\frac{pos - neg}{N}$; and 143 | 144 | * `sent_relpropdiff`, the "relative proportional difference" scale comparing the difference between positive and negative mentions as a proportion of only the total positive and negative mentions, computed as $\frac{pos - neg}{pos + neg}$. 145 | 146 | Additional custom functions, including those making use of the $neut$ category or using custom weights, can be supplied through the `fun` argument in `textstat_polarity()`, with additional arguments to `fun` supplied through `...` (for instance, the `smooth` argument in `sent_logit`) 147 | 148 | So to compute sentiment for the example, we simply need to call `textstat_polarity()`: 149 | ```{r} 150 | textstat_polarity(toks, data_dictionary_LSD2015) 151 | ``` 152 | Or for an alternative scale: 153 | ```{r} 154 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_relpropdiff) 155 | ``` 156 | ## Example on real texts 157 | 158 | Let's apply the LSD 2015 to political speeches, namely the inaugural addresses of the US presidents since 1970. We'll use the negation categories too. Notice that we don't even need to tokenize the text here, since the `textstat_polarity()` function can take a corpus as input (and will take care of the appropriate tokenization on its own). 159 | 160 | ```{r} 161 | polarity(data_dictionary_LSD2015) <- 162 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive")) 163 | 164 | sent_pres <- data_corpus_inaugural %>% 165 | corpus_subset(Year > 1970) %>% 166 | textstat_polarity(data_dictionary_LSD2015) 167 | sent_pres 168 | ``` 169 | We can plot this: 170 | ```{r} 171 | library("ggplot2") 172 | ggplot(sent_pres) + 173 | geom_point(aes(x = sentiment, y = reorder(doc_id, sentiment))) + 174 | ylab("") 175 | ``` 176 | 177 | ## Computing sentiment with valences 178 | 179 | Valences provide a more flexible method for computing sentiment analysis based on sentiment values, or valences, attached to specific word patterns. 180 | 181 | ### Simple example with user-supplied valences 182 | 183 | For a dictionary whose polarity or sentiment has been set, computing sentiment is simple: `textstat_sentiment()` is applied to the object along with the dictionary. Here, we demonstrate this for the LSD2105. 184 | 185 | ```{r} 186 | txt <- c(doc1 = "This is a fantastic, wonderful example.", 187 | doc2 = "The settlement was not amiable.", 188 | doc3 = "The good, the bad, and the ugly.") 189 | toks <- tokens(txt) 190 | 191 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1) 192 | ``` 193 | 194 | 195 | To compute sentiment, `textstat_sentiment()` will count the two positive and zero negative matches from the first example, and average these across all matches, for score of 1.0. In the second document, the positive match will generate a score of 1.0, and in the third document, the scores will be `sum(1, -1, -1) / 3 = -0.33`. 196 | ```{r} 197 | textstat_valence(toks, data_dictionary_LSD2015) 198 | ``` 199 | 200 | Note that if we include the other dictionary keys, however, then "not amicable" will be matched in the `neg_positive` count, rather than the word "amicable" being counted as positive. Because many dictionary values may be multi-word patterns, we always recommend using `textstat_sentiment()` on tokens, rather than on `dfm` objects whose features are dictionary keys rather than values. 201 | ```{r} 202 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1, 203 | neg_negative = 1, neg_positive = -1) 204 | textstat_valence(toks, data_dictionary_LSD2015) 205 | ``` 206 | 207 | Here, document 2 is now computed as -1 because its dictionary match is actually to the "neg_positive" category that has a valence of -1. The sentiment function ignored the key whose polarity was not set before, but applies it with `nested_scope = "dictionary"` when it is set, to ensure that only the longer phrase is matched. 208 | ```{r} 209 | tokens_lookup(toks, data_dictionary_LSD2015, exclusive = FALSE, 210 | nested_scope = "dictionary") 211 | ``` 212 | 213 | ### Using the AFINN dictionary 214 | 215 | We can build this dictionary from scratch using the source data: 216 | ```{r} 217 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 218 | package = "quanteda.sentiment"), 219 | header = FALSE, col.names = c("word", "valence")) 220 | head(afinn) 221 | ``` 222 | 223 | To make this into a **quanteda** dictionary: 224 | ```{r} 225 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word)) 226 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence) 227 | data_dictionary_afinn 228 | ``` 229 | This dictionary has a single key we have called "afinn", with the valences set from the original `afinn` data.frame/tibble. 230 | 231 | We can now use this to apply `textstat_valence()`: 232 | ```{r} 233 | textstat_valence(toks, data_dictionary_afinn) 234 | ``` 235 | How was this computed? We can use the dictionary to examine the words, and also to get their sentiment. 236 | ```{r} 237 | tokssel <- tokens_select(toks, data_dictionary_afinn) 238 | tokssel 239 | 240 | valence(data_dictionary_afinn)$afinn[as.character(tokssel)] 241 | ``` 242 | So here, doc1 had a score of `(4 + 4) / 2 = 4`, doc2 has no score because none of its tokens matched values in the AFINN dictionary, and doc3 was `(3 + -3 + -3) / 3 = -1`. 243 | 244 | ### Using the ANEW dictionary with multiple keys 245 | 246 | The ANEW, or Affective Norms for English Words (Bradley and Lang 2017), provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance. Reading in the original format, we have to convert this into a **quanteda** dictionary format, and add the valence values. Because this format requires a list of separate keys, we need to create a dictionary key for each of the three categories, and assign the lexicon to each key. With the ANEW, it just so happens that the lexicon -- or "values" in **quanteda** parlance -- are the same for each key, but this is not a necessary feature of valenced dictionaries. 247 | 248 | ```{r} 249 | anew <- read.delim(url("https://bit.ly/2zZ44w0")) 250 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat 251 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word, 252 | arousal = anew$Word, 253 | dominance = anew$Word)) 254 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn, 255 | arousal = anew$AroMn, 256 | dominance = anew$DomMn) 257 | ``` 258 | 259 | Now we can see that we have the dictionary in **quanteda** format with the valences attached. We also see that the values are the same in each key. 260 | ```{r} 261 | print(data_dictionary_anew, max_nval = 5) 262 | ``` 263 | 264 | The best way to compute sentiment is to choose a key and use it separately, because each key here contains the same values. 265 | ```{r} 266 | textstat_valence(toks, data_dictionary_anew["pleasure"]) 267 | textstat_valence(toks, data_dictionary_anew["arousal"]) 268 | ``` 269 | 270 | If we don't subset the dictionary keys, it will combine them, which is probably not want we want: 271 | ```{r} 272 | textstat_valence(toks, data_dictionary_anew) 273 | 274 | tokssel <- tokens_select(toks, data_dictionary_anew) 275 | vals <- lapply(valence(data_dictionary_anew), 276 | function(x) x[as.character(tokssel)]) 277 | vals 278 | ``` 279 | Without selection, the average is across all three keys: 280 | ```{r} 281 | mean(unlist(vals)) 282 | ``` 283 | 284 | ## Equivalences between polarity and valence approaches 285 | 286 | Valences can be set to produce equivalent results to sentiment, if this is desired. Considering our brief example above, and making sure we have both polarity and valence set for the LSD2015, we can show this for the two non-logit scale polarity functions. 287 | ```{r} 288 | corpus(txt) 289 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1, 290 | neg_negative = 1, neg_positive = -1) 291 | print(data_dictionary_LSD2015, 0, 0) 292 | ``` 293 | Computing this by absolute proportional difference: 294 | ```{r} 295 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_abspropdiff) 296 | ``` 297 | is the same as computing it this way using valences: 298 | ```{r} 299 | textstat_valence(txt, data_dictionary_LSD2015, norm = "all") 300 | ``` 301 | For the relative proportional difference: 302 | ```{r} 303 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_relpropdiff) 304 | textstat_valence(txt, dictionary = data_dictionary_LSD2015, norm = "dict") 305 | ``` 306 | 307 | 308 | ## References 309 | 310 | Bradley, M.M. & Lang, P.J. (2017). [Affective Norms for English Words (ANEW): Instruction manual and affective ratings](https://pdodds.w3.uvm.edu/teaching/courses/2009-08UVM-300/docs/others/everything/bradley1999a.pdf). _Technical Report C-3_. Gainesville, FL: UF Center for the Study of Emotion and Attention. 311 | 312 | Liu, B. (2015). _Sentiment analysis: Mining opinions, sentiments, and emotions_. Cambridge University Press. 313 | 314 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). Scaling Policy Preferences from Coded Political Texts. _Legislative Studies Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}. 315 | --------------------------------------------------------------------------------