├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── aaa.R
    ├── data-documentation.R
    ├── print.R
    ├── quanteda.sentiment-package.R
    ├── re-exports.R
    ├── textstat_polarity.R
    └── textstat_valence.R
├── README.Rmd
├── README.md
├── codecov.yml
├── data
    ├── data_dictionary_AFINN.rda
    ├── data_dictionary_ANEW.rda
    ├── data_dictionary_HuLiu.rda
    ├── data_dictionary_LSD2015.rda
    ├── data_dictionary_LoughranMcDonald.rda
    ├── data_dictionary_NRC.rda
    ├── data_dictionary_Rauh.rda
    ├── data_dictionary_geninqposneg.rda
    └── data_dictionary_sentiws.rda
├── inst
    ├── WORDLIST
    └── extdata
    │   └── afinn
    │       ├── AFINN-111.txt
    │       ├── AFINN-96.txt
    │       └── AFINN-README.txt
├── man
    ├── data_dictionary_AFINN.Rd
    ├── data_dictionary_ANEW.Rd
    ├── data_dictionary_HuLiu.Rd
    ├── data_dictionary_LSD2015.Rd
    ├── data_dictionary_LoughranMcDonald.Rd
    ├── data_dictionary_NRC.Rd
    ├── data_dictionary_Rauh.Rd
    ├── data_dictionary_geninqposneg.Rd
    ├── data_dictionary_sentiws.Rd
    ├── get_polarity_dictionary.Rd
    ├── images
    │   ├── unnamed-chunk-5-1.png
    │   └── unnamed-chunk-6-1.png
    ├── polarity.Rd
    ├── quanteda.sentiment-package.Rd
    ├── sentiment-functions.Rd
    ├── textstat_polarity.Rd
    ├── textstat_valence.Rd
    └── valence.Rd
├── sources
    ├── AFINN
    │   ├── AFINN-111.txt
    │   ├── AFINN-96.txt
    │   ├── AFINN-README.txt
    │   └── create-data_dictionary_AFINN.R
    ├── ANEW
    │   └── create-data_dictionary_ANEW.R
    ├── Hu-Liu
    │   ├── create_data_dictionary-HuLiu.R
    │   ├── negative-words-UTF8.txt
    │   └── positive-words.txt
    ├── Laver-Garry
    │   ├── Laver_and_Garry_2000.cat
    │   └── create-data_dictionary_LaverGarry.R
    ├── Loughran-McDonald
    │   ├── Loughran_and_McDonald_2014.cat
    │   └── create-data_dictionary_LoughranMcDonald.R
    ├── MFD
    │   ├── create-data_dictionary_MFD.R
    │   ├── mfd2.0.dic
    │   └── moral_foundations_dictionary.dic
    ├── NRC
    │   └── create_data_dictionary-NRC.R
    ├── RID
    │   ├── RID.CAT
    │   └── create-data_dictionary_RID.R
    ├── Rauh
    │   ├── Rauh_SentDictionaryGerman.Rdata
    │   ├── Rauh_SentDictionaryGerman_Negation.Rdata
    │   └── create-data_dictionary_Rauh.R
    ├── geninquirer
    │   ├── create-data_dictionary_geninquirer.R
    │   └── inquireraugmented.csv
    ├── make_sentiment_dictionaries.R
    ├── sentiws
    │   ├── create-data_dictionary_sentiws.R
    │   ├── sentiws_v1.8c_negative.txt
    │   └── sentiws_v1.8c_positive.txt
    ├── test-misc.R
    └── uk_us_english
    │   └── data_dict_usbr.csv
├── tests
    ├── spelling.R
    ├── testthat.R
    └── testthat
    │   ├── test-data.R
    │   ├── test-misc.R
    │   ├── test-test.R
    │   ├── test-textstat_polarity.R
    │   └── test-textstat_valence.R
└── vignettes
    ├── .gitignore
    └── sentiment_analysis.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | ^appveyor\.yml$
 5 | ^codecov\.yml$
 6 | ^README\.Rmd$
 7 | ^\.github$
 8 | ^doc$
 9 | ^Meta$
10 | sources
11 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v4
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v4
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | # Session Data files
 5 | .RData
 6 | # Example code in package build process
 7 | *-Ex.R
 8 | # Output files from R CMD build
 9 | /*.tar.gz
10 | # Output files from R CMD check
11 | /*.Rcheck/
12 | # RStudio files
13 | .Rproj.user/
14 | # produced vignettes
15 | vignettes/*.html
16 | vignettes/*.pdf
17 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
18 | .httr-oauth
19 | # knitr and R markdown default cache directories
20 | /*_cache/
21 | /cache/
22 | # Temporary files created by R markdown
23 | *.utf8.md
24 | *.knit.md
25 | .Rproj.user
26 | *.Rproj
27 | inst/doc
28 | doc
29 | Meta
30 | .DS_Store
31 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: quanteda.sentiment
 2 | Title: Sentiment Analysis using 'quanteda'
 3 | Version: 0.31
 4 | Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role =
 5 |     c("aut", "cre", "cph")) )
 6 | Description: Adds functions and dictionaries for computing sentiment using the 'quanteda' package.
 7 | Depends:
 8 |   R (>= 4.1.0),
 9 |   quanteda (>= 3.2.1),
10 |   methods
11 | Imports:
12 |   Matrix,
13 |   stringi
14 | License: GPL-3
15 | Encoding: UTF-8
16 | LazyData: true
17 | RoxygenNote: 7.3.1
18 | Suggests: 
19 |   covr,
20 |   ggplot2,
21 |   knitr,
22 |   rmarkdown,
23 |   spelling,
24 |   testthat
25 | Roxygen: list(markdown = TRUE)
26 | Language: en-GB
27 | VignetteBuilder: knitr
28 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method("polarity<-",dictionary2)
 4 | S3method("valence<-",dictionary2)
 5 | S3method(polarity,dictionary2)
 6 | S3method(textstat_polarity,character)
 7 | S3method(textstat_polarity,corpus)
 8 | S3method(textstat_polarity,default)
 9 | S3method(textstat_polarity,dfm)
10 | S3method(textstat_polarity,tokens)
11 | S3method(textstat_valence,character)
12 | S3method(textstat_valence,corpus)
13 | S3method(textstat_valence,default)
14 | S3method(textstat_valence,dfm)
15 | S3method(textstat_valence,tokens)
16 | S3method(valence,dictionary2)
17 | export("polarity<-")
18 | export("valence<-")
19 | export(polarity)
20 | export(sent_abspropdiff)
21 | export(sent_logit)
22 | export(sent_relpropdiff)
23 | export(textstat_polarity)
24 | export(textstat_valence)
25 | export(valence)
26 | import(methods)
27 | importFrom(Matrix,rowSums)
28 | importFrom(quanteda,as.dfm)
29 | importFrom(quanteda,convert)
30 | importFrom(quanteda,corpus)
31 | importFrom(quanteda,dfm)
32 | importFrom(quanteda,dfm_lookup)
33 | importFrom(quanteda,dictionary)
34 | importFrom(quanteda,tokens)
35 | importFrom(quanteda,tokens_lookup)
36 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # quanteda.sentiment 0.2x
 2 | 
 3 | * Usability improvements
 4 | * nested_scope = "dictionary" is now standard for applications on tokens, for textstat_polarity() (#12)
 5 | * Minor changes in preparation for forthcoming v3 release.
 6 | 
 7 | # quanteda.sentiment 0.1
 8 | 
 9 | * Added a `NEWS.md` file to track changes to the package.
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/R/aaa.R:
--------------------------------------------------------------------------------
1 | build_dictionary2 <- quanteda:::build_dictionary2
2 | validate_dictionary <- quanteda:::validate_dictionary
3 | build_dictionary2 <- quanteda:::build_dictionary2
4 | 


--------------------------------------------------------------------------------
/R/data-documentation.R:
--------------------------------------------------------------------------------
  1 | #' Lexicoder Sentiment Dictionary (2015)
  2 | #'
  3 | #' The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} [dictionary]
  4 | #' format.
  5 | #'
  6 | #' @details
  7 | #' The dictionary consists of 2,858 "negative" sentiment words and 1,709
  8 | #' "positive" sentiment words. A further set of 2,860 and 1,721 negations of
  9 | #' negative and positive words, respectively, is also included. While many users
 10 | #' will find the non-negation sentiment forms of the LSD adequate for sentiment
 11 | #' analysis, Young and Soroka (2012) did find a small, but non-negligible
 12 | #' increase in performance when accounting for negations. Users wishing to test
 13 | #' this or include the negations are encouraged to subtract negated positive
 14 | #' words from the count of positive words, and subtract the negated negative
 15 | #' words from the negative count.
 16 | #'
 17 | #' Young and Soroka (2012) also suggest the use of a pre-processing script to
 18 | #' remove specific cases of some words (i.e., "good bye", or "nobody better",
 19 | #' which should not be counted as positive). Pre-processing scripts are
 20 | #' available at <https://www.snsoroka.com/data-lexicoder/>.
 21 | #' @section License and Conditions:
 22 | #'   The LSD is available for non-commercial academic purposes only. By using
 23 | #'   `data_dictionary_LSD2015`, you accept these terms.
 24 | #'
 25 | #'   Please cite the references below when using the dictionary.
 26 | #' @format
 27 | #' A [dictionary] of four keys containing glob-style [pattern
 28 | #' matches][valuetype].
 29 | #' \describe{
 30 | #' \item{`negative`}{2,858 word patterns indicating negative sentiment}
 31 | #' \item{`positive`}{1,709 word patterns indicating positive sentiment}
 32 | #' \item{`neg_positive`}{1,721 word patterns indicating a positive word preceded
 33 | #' by a negation (used to convey negative sentiment)}
 34 | #' \item{`neg_negative`}{2,860 word patterns indicating a negative word preceded
 35 | #' by a negation (used to convey positive sentiment)}
 36 | #' }
 37 | #' @references
 38 | #'   The objectives, development and reliability of the dictionary are discussed
 39 | #'   in detail in Young and Soroka (2012). Please cite this article when using
 40 | #'   the Lexicoder Sentiment Dictionary and related resources.
 41 | #
 42 | #'   Young, L. & Soroka, S. (2012). *Lexicoder Sentiment
 43 | #'   Dictionary*. Available at <https://www.snsoroka.com/data-lexicoder/>.
 44 | #'
 45 | #'   Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of
 46 | #'   Sentiment in Political Texts. *Political Communication*, 29(2), 205--231.
 47 | #'   \doi{10.1080/10584609.2012.671234}
 48 | #' @keywords data
 49 | #' @examples
 50 | #' # checking polarity
 51 | #' polarity(data_dictionary_LSD2015)
 52 | #'
 53 | #' # simple example
 54 | #' library("quanteda")
 55 | #' txt <- "This aggressive policy will not win friends."
 56 | #'
 57 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
 58 | #'               exclusive = FALSE)
 59 | #' ## tokens from 1 document.
 60 | #' ## text1 :
 61 | #' ## [1] "This"   "NEGATIVE"   "policy"   "will"   "NEG_POSITIVE"   "POSITIVE"   "POSITIVE" "."
 62 | #'
 63 | #' # notice that double-counting of negated and non-negated terms is avoided
 64 | #' # when using nested_scope = "dictionary"
 65 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
 66 | #'               exclusive = FALSE, nested_scope = "dictionary")
 67 | #' ## tokens from 1 document.
 68 | #' ## text1 :
 69 | #' ## [1] "This"   "NEGATIVE"   "policy"   "will"   "NEG_POSITIVE" "POSITIVE."
 70 | #'
 71 | #' # on larger examples - notice that few negations are used
 72 | #' tail(data_corpus_inaugural) |>
 73 | #'   tokens() |>
 74 | #'   tokens_lookup(dictionary = data_dictionary_LSD2015) |>
 75 | #'   dfm()
 76 | "data_dictionary_LSD2015"
 77 | 
 78 | #' NRC Word-Emotion Association Lexicon
 79 | #'
 80 | #' @description
 81 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Mohammad and
 82 | #' Charron's (2010, 2013) English version of the NRC Word-Emotion Association
 83 | #' Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight
 84 | #' emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust)
 85 | #' and two sentiments (negative and positive) manually annotated on Amazon's 
 86 | #' Mechanical Turk.
 87 | #' 
 88 | #' @description
 89 | #' The Sentiment and Emotion Lexicons is a collection of lexicons that was
 90 | #' entirely created by the experts of the National Research Council of Canada.
 91 | #' Developed with a wide range of applications, this lexicon collection can be
 92 | #' used in a multitude of contexts such as sentiment analysis, product
 93 | #' marketing, consumer behaviour and even political campaign analysis.
 94 | #' 
 95 | #' @description
 96 | #' The technology uses a list of words that help identify emotions, sentiment,
 97 | #' as well as analyzing hashtags, emoticons and word-colour associations. The
 98 | #' lexicons contain entries for English words, and can be used to analyze
 99 | #' English texts.
100 | #' @references
101 | #'   Mohammad, S. & Turney, P. (2013). [Crowdsourcing a Word-Emotion Association
102 | #'   Lexicon](https://arxiv.org/abs/1308.6297). *Computational Intelligence*,
103 | #'   29(3), 436--465.
104 | #'
105 | #'   Mohammad, S. & Turney, P. (2010). [Emotions Evoked by Common Words and
106 | #'   Phrases: Using Mechanical Turk to Create an Emotion
107 | #'   Lexicon](https://dl.acm.org/doi/10.5555/1860631.1860635). In *Proceedings of
108 | #'   the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and
109 | #'   Generation of Emotion in Text*, June 2010, LA, California.
110 | #' @source <https://nrc.canada.ca/en/research-development/products-services/technical-advisory-services/sentiment-emotion-lexicons>
111 | #'
112 | #' See also <http://saifmohammad.com/WebPages/AccessResource.htm>
113 | #' @section License and Terms of Use:
114 | #' Free for research purposes.
115 | #' 
116 | #' For questions about the commercial license, email Pierre Charron (Client
117 | #' Relationship Leader at NRC): `Pierre.Charron@nrc-cnrc.gc.ca`.
118 | #' 
119 | #' Terms of Use:
120 | #' *  Cite the papers associated with the lexicons in your research papers and
121 | #'    articles that make use of them. (The papers associated with each lexicon
122 | #'    are listed below, and also in the READMEs for individual lexicons.)
123 | #' *  In news articles and online posts on work using these lexicons, cite the 
124 | #'    appropriate lexicons. For example: "This application/product/tool makes 
125 | #'    use of the `resource name`, created by `author(s)` at the National 
126 | #'    Research Council Canada." (The creators of each lexicon are listed below. 
127 | #'    Also, if you send us an email, we will be thrilled to know about how you 
128 | #'    have used the lexicon.) If possible hyperlink to this page: 
129 | #'    <http://saifmohammad.com/WebPages/lexicons.html>.
130 | #' *  If you use a lexicon in a product or application, then acknowledge this in
131 | #'    the 'About' page and other relevant documentation of the application by
132 | #'    stating the name of the resource, the authors, and NRC. For example: "This
133 | #'    application/product/tool makes use of the `resource name`, created by
134 | #'    `author(s)` at the National Research Council Canada." (The creators of 
135 | #'    each lexicon are listed below. Also, if you send us an email, we will be 
136 | #'    thrilled to know about how you have used the lexicon.) If possible 
137 | #'    hyperlink to this page: <http://saifmohammad.com/WebPages/lexicons.html>.
138 | #' *  Do not redistribute the data. Direct interested parties to this page:
139 | #'    <http://saifmohammad.com/WebPages/AccessResource.htm>.
140 | #' *  National Research Council Canada (NRC) disclaims any responsibility for 
141 | #'    the use of the lexicons listed here and does not provide technical 
142 | #'    support. However, the contact listed above will be happy to respond to 
143 | #'    queries and clarifications.
144 | #' @note Technical and research-related questions can be addressed to Saif M.
145 | #'   Mohammad (Senior Research Scientist at NRC):
146 | #'   `Saif.Mohammad@nrc-cnrc.gc.ca`.
147 | #' @keywords data
148 | "data_dictionary_NRC"
149 | 
150 | #' Positive and negative words from Hu and Liu (2004)
151 | #'
152 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing 2,006
153 | #' positive and 4,783 negative words from Hu and Liu (2004, 2005).
154 | #' @format 
155 | #' A [dictionary] of fixed word patterns with two keys:
156 | #' * `positive`: 2,006 words with positive polarity
157 | #' * `negative`: 4,783 words with negative polarity
158 | #' @references 
159 | #' Hu, M. & Liu, B. (2004). [Mining and Summarizing Customer
160 | #' Reviews](https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf). In
161 | #' Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
162 | #' and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA.
163 | #' 
164 | #' Liu, M., Hu, M., & Cheng, J. (2005). [Opinion Observer: Analyzing and
165 | #' Comparing Opinions on the
166 | #' Web](https://www.cs.uic.edu/~liub/publications/www05-p536.pdf). In
167 | #' Proceedings of the 14th International World Wide Web conference (WWW-2005),
168 | #' May 10--14, 2005, Chiba, Japan.
169 | #' @section License:
170 | #' Unknown.
171 | #' @source <https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html>
172 | #' @keywords data
173 | "data_dictionary_HuLiu"
174 | 
175 | #' Augmented General Inquirer *Positiv* and *Negativ* dictionary
176 | #'
177 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the
178 | #' *Positiv* and *Negativ* dictionary entries from the augmented
179 | #' General Inquirer. These are new valence categories described at
180 | #' `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also including the
181 | #' terms from the "yes" "no" dictionary entries.
182 | #' @format 
183 | #' A [dictionary] of fixed word patterns with two keys:
184 | #' * `positive`: 1,653 words with positive polarity
185 | #' * `negative`: 2,010 words with negative polarity
186 | #' @references Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966).
187 | #'   *The General Inquirer: A Computer Approach to Content Analysis.*
188 | #'   Cambridge, MA: MIT Press.
189 | #' @source `http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm` --
190 | #'   although this site ceased operating some time in 2021
191 | #' @keywords data
192 | "data_dictionary_geninqposneg"
193 | 
194 | #' SentimentWortschatz (SentiWS)
195 | #'
196 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing
197 | #' SentimentWortschatz (SentiWS), a publicly available German-language resource
198 | #' for sentiment analysis. The current version of SentiWS contains 1,650
199 | #' positive and 1,818 negative words, which sum up to 15,649 positive and 15,632
200 | #' negative word forms including their inflections. It not only contains
201 | #' adjectives and adverbs explicitly expressing a sentiment, but also nouns and
202 | #' verbs implicitly containing one. The original dictionary weights within the
203 | #' interval of -1 to 1. Note that the version implemented in
204 | #' \pkg{quanteda.dictionaries} uses a binary classification into positive
205 | #' (weight > 0) and negative (weight < 0) features.
206 | #' @source <https://wortschatz.uni-leipzig.de/en/download/>
207 | #' @references
208 | #'   Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly
209 | #'   Available German-language Resource for Sentiment
210 | #'   Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf).
211 | #'   In _Proceedings of the 7th International Language Ressources and Evaluation
212 | #'   (LREC'10)_, 1168--1171.
213 | #'
214 | #' @keywords data
215 | "data_dictionary_sentiws"
216 | 
217 | #' Nielsen's (2011) 'new ANEW' valenced word list
218 | #'
219 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Finn Årup
220 | #' Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of
221 | #' English words rated for valence with values between -5 (negative) and +5
222 | #' (positive). AFINN-111, the latest version, contains 2,477 words and phrases.
223 | #' @source <http://www2.imm.dtu.dk/pubdb/pubs/6010-full.html>
224 | #' @references
225 | #'   Nielsen, F. Å. (2011). [A new ANEW: Evaluation of a Word List for Sentiment
226 | #'   Analysis in Microblogs.](https://arxiv.org/abs/1103.2903) In *Proceedings
227 | #'   of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come
228 | #'   in Small Packages*, 93--98.
229 | #' @format 
230 | #' A [dictionary] with one key, `AFINN`, with valences from -5 (negative) to +5
231 | #' (positive).
232 | #' @section License:
233 | #' [Open Database License (ODbL) v1.0](https://opendatacommons.org/licenses/odbl/1-0/)
234 | #' @keywords data
235 | "data_dictionary_AFINN"
236 | 
237 | #' Affective Norms for English Words (ANEW)
238 | #'
239 | #' A quanteda dictionary object containing the ANEW, or Affective Norms for
240 | #' English Words (Bradley and Lang 2017) valenced lexicon.  The ANEW provides a
241 | #' lexicon of 2,471 distinct fixed word matches that are associated with three
242 | #' valenced categories: pleasure, arousal, and dominance.
243 | #' @format 
244 | #' A [dictionary] with three valenced keys: `pleasure`, `arousal`, and
245 | #' `dominance`, each with valences from 1 to 9 and containing the same 2,471
246 | #' fixed word values.
247 | #' @section License:
248 | #' ANEW Statement of Use
249 | #' 
250 | #' In accepting the ANEW materials, I agree not to make the ANEW available to
251 | #' the media (television, magazines, etc.) or to place them on any internet or
252 | #' computer-accessible websites. I also agree not to publish the ANEW in any
253 | #' print format – including JOURNALS, newspapers, etc. I also agree that I will
254 | #' not provide the ANEW materials to profit making companies or organizations
255 | #' and I agree not to distribute my username and password to unauthorized
256 | #' parties.
257 | #' @keywords data
258 | "data_dictionary_ANEW"
259 | 
260 | #' Rauh's German Political Sentiment Dictionary
261 | #'
262 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the
263 | #' dictionaries provided in Rauh (forthcoming). Rauh assesses its performance
264 | #' against human intuition of sentiment in German political language
265 | #' (parliamentary speeches, party manifestos, and media coverage). The resource
266 | #' builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and
267 | #' GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the
268 | #' negation correction provided by the dictionary, currently a combination of
269 | #' [tokens_replace][quanteda::tokens_replace] and [tokens_compound][quanteda::tokens_compound] is
270 | #' required to harmonize the five covered bi-gram patterns prior to scoring. The
271 | #' example below shows how to conduct this transformation. Note that the process
272 | #' changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering
273 | #' some of the features of the original corpus.
274 | #' @format The dictionary has four keys.
275 | #' \describe{
276 | #' \item{`negative`}{19,750 terms indicating negative sentiment}
277 | #' \item{`positive`}{17,330 terms indicating positive sentiment}
278 | #' \item{`neg_positive`}{17,330 terms indicating a positive word preceded
279 | #' by a negation (used to convey negative sentiment)}
280 | #' \item{`neg_negative`}{19,750 terms indicating a negative word preceded
281 | #' by a negation (used to convey positive sentiment)}
282 | #' }
283 | #' @source \doi{10.7910/DVN/BKBXWD}
284 | #' @references
285 | #'   Rauh, C. (2018). Validating a Sentiment Dictionary for German Political
286 | #'   Language: A Workbench Note. 
287 | #'   *Journal of Information Technology & Politics*, 15(4), 319--343.
288 | #'   \doi{10.1080/19331681.2018.1485608}
289 | #'
290 | #'   Remus, R., Quasthoff U., & Heyer, G. (2010). "[SentiWS - a Publicly
291 | #'   Available German-language Resource for Sentiment
292 | #'   Analysis.](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf)"
293 | #'   In *Proceedings of the 7th International Language Resources and Evaluation
294 | #'   (LREC'10)*, 1168--1171.
295 | #'
296 | #'   Waltinger, U. (2010). "[GermanPolarityClues: A Lexical Resource for German
297 | #'   Sentiment Analysis](http://www.ulliwaltinger.de/pdf/91_Paper.pdf)." In
298 | #'   *International Conference on Language Resources and Evaluation*, 17--23 May
299 | #'   2010 LREC'10.
300 | #' @examples
301 | #' \donttest{
302 | #' # tokenize example text
303 | #' toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch")
304 | #' # replace negation markers with "not"
305 | #' toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein",
306 | #'                                                     "keine", "keinen"),
307 | #'                                   replacement = rep("not", 5))
308 | #' # compound bi-gram negation patterns
309 | #' toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ")
310 | #'
311 | #' # apply dictionary
312 | #' tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |>
313 | #'   dfm()
314 | #' }
315 | #' @keywords data
316 | "data_dictionary_Rauh"
317 | 
318 | #' Loughran and McDonald Sentiment Word Lists
319 | #'
320 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing
321 | #' the 2014 version of the Loughran and McDonald Sentiment Word Lists. The
322 | #' categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903),
323 | #' "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68)
324 | #' and "modal words weak" (0).
325 | #' @source <https://sraf.nd.edu/loughranmcdonald-master-dictionary/>
326 | #' @references
327 | #'   Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability?
328 | #'   Textual Analysis, Dictionaries, and 10-Ks.
329 | #'   *Journal of Finance*, 66(1), 35--65.  \doi{10.1111/j.1540-6261.2010.01625.x}
330 | #' @keywords data
331 | "data_dictionary_LoughranMcDonald"
332 | 
333 | 


--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
 1 | print_dictionary <- quanteda:::print_dictionary
 2 | 
 3 | # new method for printing sentiment dictionaries
 4 | #' @import methods
 5 | setMethod("print", signature(x = "dictionary2"),
 6 |           function(x,
 7 |                    max_nkey = quanteda::quanteda_options("print_dictionary_max_nkey"),
 8 |                    max_nval = quanteda::quanteda_options("print_dictionary_max_nval"),
 9 |                    show_summary = quanteda::quanteda_options("print_dictionary_summary"),
10 |                    ...) {
11 |             x <- quanteda::as.dictionary(x)
12 |             if (show_summary) {
13 |               depth <- dictionary_depth(x)
14 |               lev <- if (depth > 1L) " primary" else ""
15 |               nkey <- length(names(x))
16 |               cat("Dictionary object with ", as.character(nkey), lev, " key entr",
17 |                   if (nkey == 1L) "y" else "ies", sep = "")
18 |               if (lev != "") cat(" and ", as.character(depth), " nested levels", sep = "")
19 |               cat(".\n")
20 |               if (!is.null(polarity(x))) {
21 |                 cat("Polarities: ")
22 |                 poles <- lapply(polarity(x), function(y) paste0("\"", y, "\""))
23 |                 cat(mapply(paste, names(poles), "=",
24 |                            unname(sapply(poles, paste, collapse = ", "))) |>
25 |                       paste(collapse = "; "),
26 |                     "\n")
27 |               } 
28 |               if (!is.null(valence(x))) {
29 |                 cat("Valences set for keys: ")
30 |                 cat(paste(names(valence(x)), collapse = ", "), "\n")
31 |               }
32 |             }
33 |             invisible(print_dictionary(x, 1, max_nkey, max_nval, ...))
34 |           })
35 | 


--------------------------------------------------------------------------------
/R/quanteda.sentiment-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | # The following block is used by usethis to automatically manage
5 | # roxygen namespace tags. Modify with care!
6 | ## usethis namespace: start
7 | ## usethis namespace: end
8 | NULL
9 | 


--------------------------------------------------------------------------------
/R/re-exports.R:
--------------------------------------------------------------------------------
1 | friendly_class_undefined_message <- quanteda:::friendly_class_undefined_message
2 | dictionary_depth <- quanteda:::dictionary_depth
3 | print_dictionary <- quanteda:::print_dictionary
4 | 


--------------------------------------------------------------------------------
/R/textstat_polarity.R:
--------------------------------------------------------------------------------
  1 | # textstat_polarity ----------------
  2 | 
  3 | #' Compute sentiment from key polarities
  4 | #'
  5 | #' Compute sentiment scores using a polarity approach, based on assigned
  6 | #' categories (types or features) of positive, negative, and neutral sentiment.
  7 | #' Several formulas for combining the polar categories are available, or the
  8 | #' user can supply a custom function.
  9 | #' @param x a character, [corpus], [tokens], or [dfm] object containing
 10 | #'   text, tokens, or features whose sentiment will be scored
 11 | #' @param dictionary a [dictionary] that has [polarity] set, indicating which
 12 | #'   keys are associated with positive, negative, and (optionally) neutral
 13 | #'   sentiment
 14 | #' @param fun function; the formula for computing sentiment, which must refer to
 15 | #'   `pos`, `neg`, and (optionally) `neut`.  The default is the "logit" scale
 16 | #'   (Lowe et al 2011) which is the log of (positive / negative) counts.  See
 17 | #'   [sentiment-functions] for details and for additional available functions,
 18 | #'   as well as details on how to supply custom functions.
 19 | #' @param ... additional arguments passed to `fun`
 20 | #' @return a [data.frame] of sentiment scores
 21 | #' @export
 22 | #' @references  Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
 23 | #'   Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
 24 | #'   Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}
 25 | #' @examples
 26 | #' library("quanteda")
 27 | #' corp <- tail(data_corpus_inaugural, n = 5)
 28 | #' toks <- tokens(corp)
 29 | #' dfmat <- dfm(toks)
 30 | #' polar1 <- list(pos = "positive", neg = "negative")
 31 | #' polar2 <- list(pos = c("positive", "neg_negative"),
 32 | #'                neg = c("negative", "neg_positive"))
 33 | #'
 34 | #' polarity(data_dictionary_LSD2015) <- polar1
 35 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
 36 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
 37 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
 38 | #'
 39 | #' polarity(data_dictionary_LSD2015) <- polar2
 40 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
 41 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
 42 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
 43 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
 44 | #'
 45 | #' # with a user-supplied function
 46 | #' sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
 47 | #' textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn)
 48 | textstat_polarity <- function(x, dictionary, fun = sent_logit, ...) {
 49 |     UseMethod("textstat_polarity")
 50 | }
 51 | 
 52 | #' @export
 53 | textstat_polarity.default <-  function(x, dictionary, fun = sent_logit, ...) {
 54 |     stop(friendly_class_undefined_message(class(x), "textstat_polarity"))
 55 | }
 56 | 
 57 | #' @importFrom quanteda corpus
 58 | #' @export
 59 | textstat_polarity.character <- function(x, ...) {
 60 |     textstat_polarity(corpus(x), ...)
 61 | }
 62 | 
 63 | #' @importFrom quanteda tokens
 64 | #' @export
 65 | textstat_polarity.corpus <- function(x, ...) {
 66 |     textstat_polarity(tokens(x), ...)
 67 | }
 68 | 
 69 | #' @importFrom quanteda dictionary tokens_lookup dfm
 70 | #' @export
 71 | textstat_polarity.tokens <- function(x, dictionary, ...) {
 72 |     dict <- get_polarity_dictionary(dictionary)
 73 |     poldict <- dictionary(polarity(dict))
 74 |     polarity(poldict) <- polarity(dict)
 75 | 
 76 |     tokens(x) |>
 77 |         tokens_lookup(dictionary = dict, nomatch = "other", nested_scope = "dictionary") |>
 78 |         dfm() |>
 79 |         textstat_polarity(dictionary = poldict, ...)
 80 | }
 81 | 
 82 | #' @importFrom quanteda convert dfm_lookup as.dfm
 83 | #' @export
 84 | textstat_polarity.dfm <- function(x, dictionary, fun = sent_logit, ...) {
 85 |     dict <- get_polarity_dictionary(dictionary)
 86 | 
 87 |     result <- fun(dfm_lookup(x, dict, nomatch = "other"), ...)
 88 |     result <- convert(as.dfm(result), to = "data.frame")
 89 |     names(result)[2] <- "sentiment"
 90 | 
 91 |     class(result) <- c("sentiment", "textstat", "data.frame")
 92 |     attr(result, "fun") <- fun
 93 |     attr(result, "fun_name") <- as.character(substitute(fun))
 94 | 
 95 |     result
 96 | }
 97 | 
 98 | 
 99 | # polarity setting and checking functions --------------
100 | 
101 | #' Set or get the sentiment polarity of a dictionary
102 | #'
103 | #' Set or retrieve the polarity of a [dictionary] object for the purposes of
104 | #' sentiment analysis.  Polarity consists of a set of dictionary keys that are
105 | #' associated with positive, negative, and (optionally) neutral categories for
106 | #' use in [textstat_polarity()].
107 | #'
108 | #' A dictionary may have only one set of polarities at a time, but may be
109 | #' changed as needed.
110 | #' @param x a [dictionary] object
111 | #' @return `polarity()` returns the polarity as a list.
112 | #' @keywords dictionary textstat utility
113 | #' @export
114 | #'
115 | #' @examples
116 | #' library("quanteda")
117 | #' simpledict <- dictionary(list(
118 | #'     happy = c("happy", "jubilant", "exuberant"),
119 | #'     sad = c("sad", "morose", "down")
120 | #' ))
121 | #' polarity(simpledict)
122 | #' polarity(simpledict) <- list(pos = "happy", neg = "sad")
123 | #' polarity(simpledict)
124 | #'
125 | #' # can list multiple keys
126 | #' polarity(data_dictionary_LSD2015) <- list(
127 | #'     pos = c("positive", "neg_negative"),
128 | #'     neg = c("negative", "neg_positive")
129 | #' )
130 | #' polarity(data_dictionary_LSD2015)
131 | polarity <- function(x) {
132 |     UseMethod("polarity")
133 | }
134 | 
135 | #' @export
136 | polarity.dictionary2 <- function(x) {
137 |     x@meta$object$polarity
138 | }
139 | 
140 | #' @rdname polarity
141 | #' @param value list consisting of named character vectors `pos`, `neg`, and
142 | #'   (optionally) `neut` corresponding to positive, negative, and neutral
143 | #'   sentiment categories respectively.  Each element may contain multiple
144 | #'   key names.  The `neut` category is optional but `pos` and `neg` must be
145 | #'   supplied.
146 | #' @return `polarity<-` sets the dictionary's polarity.
147 | #' @export
148 | "polarity<-" <- function(x, value) {
149 |     UseMethod("polarity<-")
150 | }
151 | 
152 | #' @export
153 | "polarity<-.dictionary2" <- function(x, value) {
154 |     if (!is.null(value)) {
155 |         if (!setequal(union(c("pos", "neg", "neut"), names(value)),
156 |                       c("pos", "neg", "neut")) ||
157 |             !is.list(value)) {
158 |             stop("value must be a list of 'pos', 'neg', and (optionally) 'neut'",
159 |                  call. = FALSE)
160 |         }
161 |         check_that_poles_exist(x, value)
162 |     } else {
163 |         if (is.null(valence(x))) class(x) <- "dictionary2"
164 |     }
165 |         
166 |     x@meta$object$polarity <- value
167 |     x
168 | }
169 | 
170 | #' Get a standard polarity dictionary for sentiment analysis
171 | #'
172 | #' Checks and standardizes a [dictionary] object with its [polarity] set, so
173 | #' that the polarity categories are standardized into the keys `pos`, `neg`, and
174 | #' (optionally) `neut`.  Also checks that the dictionary contains all of the
175 | #' keys named in the polarity object.  (It is necessary to check here since the
176 | #' dictionary could have been subset after creation.)
177 | #' @param dictionary a \pkg{quanteda} [dictionary]
178 | #' @return a single-level [dictionary] with keys `pos`, `neg`, and (optionally)
179 | #'   `neut`.
180 | #' @keywords internal
181 | get_polarity_dictionary <- function(dictionary) {
182 |     poles <- polarity(dictionary)
183 | 
184 |     # check the poles
185 |     if (is.null(poles)) {
186 |         stop("polarity is not set for this dictionary; see ?polarity",
187 |              call. = FALSE)
188 |     }
189 |     check_that_poles_exist(dictionary, poles)
190 | 
191 |     # standardize the dictionary
192 |     dictlist <- list(
193 |         pos = unlist(dictionary[poles$pos], use.names = FALSE),
194 |         neg = unlist(dictionary[poles$neg], use.names = FALSE),
195 |         neut = unlist(dictionary[poles$neut], use.names = FALSE)
196 |     )
197 |     dict <- dictionary(dictlist[!sapply(dictlist, is.null)])
198 | 
199 |     # set the polarity to the keys
200 |     newpoles <- list(pos = "pos", neg = "neg")
201 |     if (!is.null(dictlist$neut)) newpoles <- c(newpoles, list(neut = "neut"))
202 |     polarity(dict) <- newpoles
203 | 
204 |     return(dict)
205 | }
206 | 
207 | 
208 | check_that_poles_exist <- function(dictionary, poles) {
209 |     poles <- unlist(poles, use.names = FALSE)
210 |     polematch <- poles %in% names(dictionary)
211 |     if (!all(polematch)) {
212 |         stop("'", poles[!polematch], "' key not found in this dictionary",
213 |              call. = FALSE)
214 |     }
215 | }
216 | 
217 | # sentiment formula functions --------------
218 | 
219 | #' Sentiment functions
220 | #'
221 | #' Functions for computing sentiment, for [textstat_polarity()].  Each function
222 | #' takes an input [dfm] with fixed feature names (see Details), and returns a
223 | #' sparse Matrix with a single column representing the results of the sentiment
224 | #' calculation.
225 | #'
226 | #' @details
227 | #' User supplied functions must take `x` and optional additional arguments, such
228 | #' as `smooth` for a smoothing constant for the logit scaling function. feature
229 | #' names for the sentiment categories `pos`, `neg`, `neut`, and `other`.  (The
230 | #' `other` category is only required when a scaling function needs the count of
231 | #' non-sentiment associated features.)
232 | #'
233 | #' Additional arguments may be passed via `...`, such as `smooth` for the logit
234 | #' scale.
235 | #'
236 | #' @param x a [dfm] that has the following required feature names: `pos`,
237 | #' `neg`, `neut`, and `other`
238 | #' @return a sparse \pkg{Matrix} object of documents by sentiment score, where
239 | #'   the sentiment score is the only column.  (Its name is unimportant as this
240 | #'   will not be used by [textstat_polarity()].)
241 | #' @keywords textstat internal
242 | #' @references  Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
243 | #'   Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
244 | #'   Quarterly_, 36(1), 123–155.
245 | #'   \doi{10.1111/j.1939-9162.2010.00006.x}
246 | #' @name sentiment-functions
247 | #' @examples
248 | #' library("quanteda")
249 | #' dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |>
250 | #'   tokens() |>
251 | #'   dfm()
252 | #' sent_logit(dfmat)
253 | #' sent_abspropdiff(dfmat)
254 | #'
255 | #' # user-supplied function
256 | #' my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
257 | #' my_sent_fn(dfmat)
258 | #'
259 | #' # user supplied function with fixed weights and using neutral category
260 | #' dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |>
261 | #'   tokens() |>
262 | #'   dfm()
263 | #' my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3
264 | #' my_sent_fn2(dfmat2)
265 | NULL
266 | 
267 | #' @description `sent_logit` is \eqn{log(\frac{pos}{neg})}.
268 | #' @rdname sentiment-functions
269 | #' @param smooth additional smoothing function added to `pos` and `neg` before
270 | #'   logarithmic transformation
271 | #' @export
272 | sent_logit <- function(x, smooth = 0.5) {
273 |     log(x[, "pos"] + smooth) - log(x[, "neg"] + smooth)
274 | }
275 | 
276 | #' @description `sent_abspropdiff` is \eqn{\frac{pos - neg}{N}}, where \eqn{N}
277 | #'   is the total number of all features in a document.
278 | #' @rdname sentiment-functions
279 | #' @importFrom Matrix rowSums
280 | #' @export
281 | sent_abspropdiff <- function(x) {
282 |     (x[, "pos"] - x[, "neg"]) / Matrix::rowSums(x)
283 | }
284 | 
285 | #' @description `sent_relpropdiff` is \eqn{\frac{pos - neg}{pos + neg}}.
286 | #' @rdname sentiment-functions
287 | #' @export
288 | sent_relpropdiff <- function(x) {
289 |     (x[, "pos"] - x[, "neg"]) / (x[, "pos"] + x[, "neg"])
290 | }
291 | 


--------------------------------------------------------------------------------
/R/textstat_valence.R:
--------------------------------------------------------------------------------
  1 | # textstat_valence ----------------
  2 | 
  3 | #' Compute sentiment from word valences
  4 | #'
  5 | #' Compute sentiment scores from tokens or document-feature matrices, based on
  6 | #' the valences of dictionary keys and values.
  7 | #' @param x a character, [corpus], [tokens], or [dfm] object containing
  8 | #'   text, tokens, or features whose sentiment will be scored.
  9 | #' @param dictionary a \pkg{quanteda} [dictionary] that has [valence] set, in
 10 | #'   the form of numerical valences associated with sentiment
 11 | #' @param normalization the baseline for normalizing the sentiment counts after
 12 | #'   scoring. Sentiment scores within keys are weighted means of the tokens
 13 | #'   matched to dictionary values, weighted by their valences.  The default
 14 | #'   `"dictionary"` is to average over only the valenced words.  `"all"`
 15 | #'   averages across all tokens, and `"none"` does no normalization.
 16 | #' @param ... not used here
 17 | #' @return a data.frame of sentiment scores
 18 | #' @note
 19 | #' If the input item is a [dfm], then multi-word values will not be matched
 20 | #' unless the features of the [dfm] have been compounded previously.  The input
 21 | #' objects should not have had dictionaries applied previously.
 22 | #' @export
 23 | #' @references
 24 | #'   For a discussion of how to aggregate sentiment scores to the document
 25 | #'   level, see:
 26 | #'
 27 | #'   Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
 28 | #'   Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
 29 | #'   Quarterly_, 36(1), 123–155.
 30 | #'   \doi{10.1111/j.1939-9162.2010.00006.x}
 31 | #' @seealso [valence()]
 32 | #' @examples
 33 | #' library("quanteda")
 34 | #' \dontrun{
 35 | #'
 36 | #' # AFINN
 37 | #' afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 
 38 | #'                                 package = "quanteda.sentiment"),
 39 | #'                     header = FALSE, col.names = c("word", "valence"))
 40 | #' data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
 41 | #' valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
 42 | #' textstat_valence(toks, dictionary = data_dictionary_afinn)
 43 | #'
 44 | #' # ANEW
 45 | #' anew <- read.delim(url("https://bit.ly/2zZ44w0"))
 46 | #' anew <- anew[!duplicated(anew$Word), ] # because some words repeat
 47 | #' data_dictionary_anew <- dictionary(list(pleasure = anew$Word,
 48 | #'                                         arousal = anew$Word,
 49 | #'                                         dominance = anew$Word))
 50 | #' valence(data_dictionary_anew) <- list(pleasure = anew$ValMn,
 51 | #'                                       arousal = anew$AroMn,
 52 | #'                                       dominance = anew$DomMn)
 53 | #' textstat_valence(toks, data_dictionary_anew["pleasure"])
 54 | #' textstat_valence(toks, data_dictionary_anew["arousal"])}
 55 | #'
 56 | textstat_valence <- function(x, dictionary, 
 57 |                              normalization = c("dictionary", "all", "none"), ...) {
 58 |   UseMethod("textstat_valence")
 59 | }
 60 | 
 61 | #' @export
 62 | textstat_valence.default <-  function(x, dictionary, 
 63 |                                       normalization = c("dictionary", "all", "none"), ...) {
 64 |   stop(friendly_class_undefined_message(class(x), "textstat_valence"))
 65 | }
 66 | 
 67 | #' @export
 68 | textstat_valence.character <- function(x, ...) {
 69 |   textstat_valence(corpus(x), ...)
 70 | }
 71 | 
 72 | #' @export
 73 | textstat_valence.corpus <- function(x, ...) {
 74 |   textstat_valence(tokens(x), ...)
 75 | }
 76 | 
 77 | #' @export
 78 | textstat_valence.tokens <- function(x, dictionary, 
 79 |                                     normalization = c("dictionary", "all", "none"), ...) {
 80 |   normalization <- match.arg(normalization)
 81 |   valence(dictionary) <- set_valences(dictionary, valence(dictionary))
 82 |   numdict <- dictionary(as.list(flip_valence(dictionary)))
 83 |   quanteda::as.tokens(x) |>
 84 |     tokens_lookup(dictionary = numdict, nomatch = "other",
 85 |                   nested_scope = "dictionary") |>
 86 |     dfm() |>
 87 |     aggregate_valence(norm = normalization)
 88 | }
 89 | 
 90 | #' @export
 91 | textstat_valence.dfm <- function(x, dictionary, 
 92 |                                  normalization = c("dictionary", "all", "none"), ...) {
 93 |   normalization <- match.arg(normalization)
 94 |   valence(dictionary) <- set_valences(dictionary, valence(dictionary))
 95 |   numdict <- dictionary(as.list(flip_valence(dictionary)))
 96 |   as.dfm(x) |>
 97 |     dfm_lookup(dictionary = numdict, nomatch = "other") |>
 98 |     aggregate_valence(norm = normalization)
 99 | }
100 | 
101 | # internal sentiment calculation functions -----------
102 | 
103 | # uses Kohei's approach to make the valence values into the keys, and
104 | # then groups all values together under that score
105 | flip_valence <- function(dictionary) {
106 |   v <- valence(dictionary)
107 |   if (is.null(v)) stop("valence not set")
108 | 
109 |   structure(unlist(sapply(v, names), use.names = FALSE),
110 |             names = unlist(v, use.names = FALSE))
111 | }
112 | 
113 | aggregate_valence <- function(x, norm = c("dictionary", "all", "none")) {
114 |   norm <- match.arg(norm)
115 |   other_index <- match("other", colnames(x))
116 |   if (norm == "dictionary") {
117 |     denom <- rowSums(x[, -other_index])
118 |   } else if (norm == "all") {
119 |     denom <- rowSums(x)
120 |   } else if (norm == "none") {
121 |     denom <- 1
122 |   }
123 |   x <- x[, -other_index]
124 |   result <- data.frame(doc_id = quanteda::docnames(x),
125 |                        sentiment = as.vector(x %*% as.numeric(colnames(x))
126 |                                              / denom))
127 |   result$sentiment[is.na(result$sentiment)] <- 0
128 |   result
129 | }
130 | 
131 | # valence setting and checking functions --------------
132 | 
133 | #' Set or get the valences of dictionary values or keys
134 | #'
135 | #' Set or retrieve the valences of a [dictionary] object for the purposes of
136 | #' sentiment analysis.  Valences consist of numerical values attached to each
137 | #' dictionary "value".  For dictionaries with a more "polarity"-based approach,
138 | #' see [textstat_polarity()]
139 | #'
140 | #' Valences are used only in [textstat_valence()].
141 | #'
142 | #' A dictionary may have only one set of valences at a time, but may be
143 | #' changed as needed.
144 | #' @param x a \pkg{quanteda} [dictionary][quanteda::dictionary] object
145 | #' @return `valences()` returns the valences as a list named numeric vectors,
146 | #'   where each list element corresponds to a key in the dictionary, and each
147 | #'   numeric element matches a value within that key.
148 | #' @keywords dictionary textstat utility
149 | #' @seealso [textstat_valence()], [valence()]
150 | #' @export
151 | #'
152 | #' @examples
153 | #' library("quanteda")
154 | #'
155 | #' # setting valences
156 | #' dict <- dictionary(list(
157 | #'     happiness = c("happy", "jubilant", "exuberant", "content"),
158 | #'     anger = c("mad", "peeved", "irate", "furious", "livid")
159 | #' ))
160 | #' valence(dict)
161 | #' # using a 5-point scale: 1:1 match
162 | #' valence(dict) <- list(happiness = c(3, 4, 5, 2),
163 | #'                       anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
164 | #' valence(dict)
165 | #' # with single valences applied to all values within the keys
166 | #' valence(dict) <- c(happiness = 1, anger = -1)
167 | #' valence(dict)
168 | #' # with named elements - order does not matter
169 | #' valence(dict) <- list(
170 | #'     happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2)
171 | #' )
172 | #' valence(dict)
173 | #'
174 | valence <- function(x) {
175 |   UseMethod("valence")
176 | }
177 | 
178 | #' @export
179 | valence.dictionary2 <- function(x) {
180 |     x@meta$object$valence
181 | }
182 | 
183 | #' @rdname valence
184 | #' @param value named list consisting of numerical value.  The names of the
185 | #'   elements must correspond to a dictionary key. Each element must be:
186 | #'   * a single numeric value that will be applied to all of the dictionary
187 | #'   values in that key; or
188 | #'   * a vector of numeric values that matches the length and order of the
189 | #'   dictionary values in that key; or
190 | #'   * a named numeric vector where each element name matches dictionary values
191 | #'   in the key.
192 | #' @return `valence<-` sets the dictionary's valences.
193 | #' @export
194 | "valence<-" <- function(x, value) {
195 |   UseMethod("valence<-")
196 | }
197 | 
198 | #' @export
199 | "valence<-.dictionary2" <- function(x, value) {
200 |   if (!is.null(value)) {
201 |     value <- as.list(value)
202 |     check_valences(x, value)
203 |     x@meta$object$valence <- set_valences(x, value)
204 |   } else {
205 |     x@meta$object$valence <- NULL
206 |     if (!is.null(polarity(x))) class(x) <- "dictionary2"
207 |   }
208 |   x
209 | }
210 | 
211 | dictionary_depth <- quanteda:::dictionary_depth
212 | 
213 | check_valences <- function(dictionary, valences) {
214 |   if (dictionary_depth(dictionary) > 1)
215 |     stop("valenced dictionaries cannot be nested", call. = FALSE)
216 |   if (!is.list(valences) || any(names(valences) == ""))
217 |     stop("valence must be a fully named list", call. = FALSE)
218 |   for (key in names(valences)) {
219 |     if (!key %in% names(dictionary))
220 |       stop("'", key, "' is not a dictionary key", call. = FALSE)
221 |     if (!is.numeric(valences[[key]]))
222 |       stop("valence values must be numeric", call. = FALSE)
223 |     if (length(valences[[key]]) != 1 &&
224 |         length(valences[[key]]) != length(dictionary[[key]]))
225 |       stop("valence value length not equal to number of values for key '",
226 |            key, "'", call. = FALSE)
227 |   }
228 | }
229 | 
230 | set_valences <- function(dictionary, valences) {
231 |   # only use valences for keys in dictionary
232 |   valences <- valences[names(valences) %in% names(dictionary)]
233 |   if (!length(valences))
234 |     stop("no valenced keys found")
235 | 
236 |   for (key in names(valences)) {
237 |     # repeat valences if only a single value is supplied
238 |     if (length(valences[[key]]) == 1)
239 |       valences[[key]] <- rep(valences[[key]], length(dictionary[[key]]))
240 |     # use dictionary values as names if none supplied
241 |     if (length(names(valences[[key]])) != length(valences[[key]]))
242 |       names(valences[[key]]) <- dictionary[[key]]
243 |   }
244 |   valences
245 | }
246 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | ```{r, echo = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "##",
  9 |   fig.path = "man/images/"
 10 | )
 11 | ```
 12 | ```{r echo = FALSE, results = "hide", message = FALSE}
 13 | library("badger")
 14 | ```
 15 | 
 16 | # quanteda.sentiment
 17 | 
 18 | <!-- badges: start -->
 19 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/quanteda.sentiment)](https://cran.r-project.org/package=quanteda.sentiment)
 20 | `r badge_devel("quanteda/quanteda.sentiment", "royalblue")`
 21 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
 22 | [![Codecov test coverage](https://codecov.io/gh/quanteda/quanteda.sentiment/branch/master/graph/badge.svg)](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master)
 23 | [![R-CMD-check](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml)
 24 | <!-- badges: end -->
 25 | 
 26 | ## Installation
 27 | 
 28 | You can install **quanteda.sentiment** from GitHub with:
 29 | 
 30 | ```{r eval = FALSE}
 31 | remotes::install_github("quanteda/quanteda.sentiment")
 32 | ```
 33 | 
 34 | The package is not yet on CRAN.
 35 | 
 36 | ## About
 37 | 
 38 | **quanteda.sentiment** extends the **quanteda** package with functions for computing sentiment on text.  It has two main functions, for computing two types of sentiment.  These follow the structure of a **quanteda** dictionary, which consists of _key_ entries expressing the canonical concept, and _value_ patterns (such as "good", "sad*", etc.) to be matched in a text and counted as occurrences of that key.
 39 | 
 40 | The approach to sentiment in this package approaches sentiment computation in two ways, depending on whether sentiment is considered a key attribute, in which case the keys are assigned a _polarity_ such as _positive_ or _negative_, or whether individual values are assigned a _valence_, in the form of some continuous value indicating a degree of sentiment.  Each is implemented in a separate function:
 41 | 
 42 | *  **Polarity-based sentiment.**  This is implemented via `textstat_polarity()`, for computing a sentiment based on keys set as "poles" of positive versus negative sentiment.  Setting polarity is dones through the `polarity()<-` function and can be set for any dictionary, for any keys.  "Sentiment" here can be broadly construed as any contrasting pair of poles, such as "Democrat" versus "Republican", for instance.  More than one key can be associated with the same pole.
 43 | 
 44 |     Polar values are converted into sentiment scores using a flexible function, such as $\mathrm{log}(pos / neg)$, or $(pos - neg)/(pos + neg)$.  **quanteda.sentiment** offers three built-in functions, but the user can supply any function for combining polarities.
 45 | 
 46 | * **Valence-based sentiment.**  This is implemented via `textstat_valence()`, for computing sentiment as the average valence of a document's words, based on a dictionary whose values have numeric valence scores.  Valence scores are set using the `valence()<-` function.  Each key in a dictionary may have values with difference valences.
 47 | 
 48 | The package comes with the following built-in dictionaries:
 49 | 
 50 | | Name                             | Description                                                   | Polarity | Valence |
 51 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:|
 52 | | data_dictionary_AFINN            | Nielsen's (2011) 'new ANEW' valenced word list                |          |    ✔    |
 53 | | data_dictionary_ANEW             | Affective Norms for English Words (ANEW)                      |          |    ✔    |
 54 | | data_dictionary_geninqposneg     | Augmented General Inquirer _Positiv_ and _Negativ_ dictionary |     ✔    |         |
 55 | | data_dictionary_HuLiu            | Positive and negative words from Hu and Liu (2004)            |     ✔    |         |
 56 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists                    |     ✔    |         |
 57 | | data_dictionary_LSD2015          | Lexicoder Sentiment Dictionary (2015)                         |     ✔    |         |
 58 | | data_dictionary_NRC              | NRC Word-Emotion Association Lexicon                          |     ✔    |         |
 59 | | data_dictionary_Rauh             | Rauh's German Political Sentiment Dictionary                  |     ✔    |         |
 60 | | data_dictionary_sentiws          | SentimentWortschatz (SentiWS)                                 |     ✔    |    ✔    |
 61 | 
 62 | 
 63 | ## Examples
 64 | 
 65 | For a polarity dictionary, we can use the positive and negative key categories from the General Inquirer dictionary:
 66 | ```{r}
 67 | library("quanteda.sentiment")
 68 | 
 69 | # inspect the dictionary and its polarities
 70 | print(data_dictionary_geninqposneg, max_nval = 8)
 71 | 
 72 | # compute sentiment
 73 | tail(data_corpus_inaugural) |>
 74 |   textstat_polarity(dictionary = data_dictionary_geninqposneg)
 75 | ```
 76 | 
 77 | For a valence dictionary, we can compute this for the "pleasure" category of the Affective Norms for English Words (ANEW): 
 78 | ```{r}
 79 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE)
 80 | library("quanteda.sentiment")
 81 | 
 82 | # inspect the dictionary and its valences
 83 | print(data_dictionary_ANEW, max_nval = 8)
 84 | lapply(valence(data_dictionary_ANEW), head, 8)
 85 | 
 86 | # compute the sentiment
 87 | tail(data_corpus_inaugural) |>
 88 |   textstat_valence(dictionary = data_dictionary_ANEW["pleasure"])
 89 | ```
 90 | 
 91 | We can compare two measures computed in different ways (although they are not comparable, really, since they are different lexicons):
 92 | ```{r}
 93 | # ensure we have this package's version of the dictionary
 94 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
 95 | 
 96 | sent_pol <- tail(data_corpus_inaugural, 25) |>
 97 |   textstat_polarity(dictionary = data_dictionary_LSD2015)
 98 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment)
 99 | sent_val <- tail(data_corpus_inaugural, 25) |>
100 |   textstat_valence(dictionary = data_dictionary_AFINN)
101 | 
102 | library("ggplot2")
103 | 
104 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment),
105 |        aes(x = polarity, y = valence)) +
106 |   geom_point()
107 | ```
108 | 
109 | Good enough for government work!
110 | 
111 | ## Where to learn more
112 | 
113 | Each dictionary and function has extensive documentation, including references to social scientific research articles where each sentiment concept is described in detail.  There is also a package vignette with more detailed examples.
114 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # quanteda.sentiment
  3 | 
  4 | <!-- badges: start -->
  5 | 
  6 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/quanteda.sentiment)](https://cran.r-project.org/package=quanteda.sentiment)
  7 | [![](https://img.shields.io/badge/devel%20version-0.31-royalblue.svg)](https://github.com/quanteda/quanteda.sentiment)
  8 | [![Lifecycle:
  9 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
 10 | [![Codecov test
 11 | coverage](https://codecov.io/gh/quanteda/quanteda.sentiment/branch/master/graph/badge.svg)](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master)
 12 | [![R-CMD-check](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml)
 13 | <!-- badges: end -->
 14 | 
 15 | ## Installation
 16 | 
 17 | You can install **quanteda.sentiment** from GitHub with:
 18 | 
 19 | ``` r
 20 | remotes::install_github("quanteda/quanteda.sentiment")
 21 | ```
 22 | 
 23 | The package is not yet on CRAN.
 24 | 
 25 | ## About
 26 | 
 27 | **quanteda.sentiment** extends the **quanteda** package with functions
 28 | for computing sentiment on text. It has two main functions, for
 29 | computing two types of sentiment. These follow the structure of a
 30 | **quanteda** dictionary, which consists of *key* entries expressing the
 31 | canonical concept, and *value* patterns (such as “good”, “sad\*“, etc.)
 32 | to be matched in a text and counted as occurrences of that key.
 33 | 
 34 | The approach to sentiment in this package approaches sentiment
 35 | computation in two ways, depending on whether sentiment is considered a
 36 | key attribute, in which case the keys are assigned a *polarity* such as
 37 | *positive* or *negative*, or whether individual values are assigned a
 38 | *valence*, in the form of some continuous value indicating a degree of
 39 | sentiment. Each is implemented in a separate function:
 40 | 
 41 | - **Polarity-based sentiment.** This is implemented via
 42 |   `textstat_polarity()`, for computing a sentiment based on keys set as
 43 |   “poles” of positive versus negative sentiment. Setting polarity is
 44 |   dones through the `polarity()<-` function and can be set for any
 45 |   dictionary, for any keys. “Sentiment” here can be broadly construed as
 46 |   any contrasting pair of poles, such as “Democrat” versus “Republican”,
 47 |   for instance. More than one key can be associated with the same pole.
 48 | 
 49 |   Polar values are converted into sentiment scores using a flexible
 50 |   function, such as $\mathrm{log}(pos / neg)$, or
 51 |   $(pos - neg)/(pos + neg)$. **quanteda.sentiment** offers three
 52 |   built-in functions, but the user can supply any function for combining
 53 |   polarities.
 54 | 
 55 | - **Valence-based sentiment.** This is implemented via
 56 |   `textstat_valence()`, for computing sentiment as the average valence
 57 |   of a document’s words, based on a dictionary whose values have numeric
 58 |   valence scores. Valence scores are set using the `valence()<-`
 59 |   function. Each key in a dictionary may have values with difference
 60 |   valences.
 61 | 
 62 | The package comes with the following built-in dictionaries:
 63 | 
 64 | | Name                             | Description                                                   | Polarity | Valence |
 65 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:|
 66 | | data_dictionary_AFINN            | Nielsen’s (2011) ‘new ANEW’ valenced word list                |          |    ✔    |
 67 | | data_dictionary_ANEW             | Affective Norms for English Words (ANEW)                      |          |    ✔    |
 68 | | data_dictionary_geninqposneg     | Augmented General Inquirer *Positiv* and *Negativ* dictionary |    ✔     |         |
 69 | | data_dictionary_HuLiu            | Positive and negative words from Hu and Liu (2004)            |    ✔     |         |
 70 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists                    |    ✔     |         |
 71 | | data_dictionary_LSD2015          | Lexicoder Sentiment Dictionary (2015)                         |    ✔     |         |
 72 | | data_dictionary_NRC              | NRC Word-Emotion Association Lexicon                          |    ✔     |         |
 73 | | data_dictionary_Rauh             | Rauh’s German Political Sentiment Dictionary                  |    ✔     |         |
 74 | | data_dictionary_sentiws          | SentimentWortschatz (SentiWS)                                 |    ✔     |    ✔    |
 75 | 
 76 | ## Examples
 77 | 
 78 | For a polarity dictionary, we can use the positive and negative key
 79 | categories from the General Inquirer dictionary:
 80 | 
 81 | ``` r
 82 | library("quanteda.sentiment")
 83 | ## Loading required package: quanteda
 84 | ## Package version: 4.0.0
 85 | ## Unicode version: 14.0
 86 | ## ICU version: 71.1
 87 | ## Parallel computing: 10 of 10 threads used.
 88 | ## See https://quanteda.io for tutorials and examples.
 89 | ## 
 90 | ## Attaching package: 'quanteda.sentiment'
 91 | ## The following object is masked from 'package:quanteda':
 92 | ## 
 93 | ##     data_dictionary_LSD2015
 94 | 
 95 | # inspect the dictionary and its polarities
 96 | print(data_dictionary_geninqposneg, max_nval = 8)
 97 | ## Dictionary object with 2 key entries.
 98 | ## Polarities: pos = "positive"; neg = "negative" 
 99 | ## - [positive]:
100 | ##   - abide, ability, able, abound, absolve, absorbent, absorption, abundance [ ... and 1,645 more ]
101 | ## - [negative]:
102 | ##   - abandon, abandonment, abate, abdicate, abhor, abject, abnormal, abolish [ ... and 2,002 more ]
103 | 
104 | # compute sentiment
105 | tail(data_corpus_inaugural) |>
106 |   textstat_polarity(dictionary = data_dictionary_geninqposneg)
107 | ##       doc_id sentiment
108 | ## 1  2001-Bush 0.9233579
109 | ## 2  2005-Bush 0.9829457
110 | ## 3 2009-Obama 0.5666378
111 | ## 4 2013-Obama 0.7597420
112 | ## 5 2017-Trump 0.7724428
113 | ## 6 2021-Biden 0.6018714
114 | ```
115 | 
116 | For a valence dictionary, we can compute this for the “pleasure”
117 | category of the Affective Norms for English Words (ANEW):
118 | 
119 | ``` r
120 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE)
121 | library("quanteda.sentiment")
122 | 
123 | # inspect the dictionary and its valences
124 | print(data_dictionary_ANEW, max_nval = 8)
125 | ## Dictionary object with 3 key entries.
126 | ## Valences set for keys: pleasure, arousal, dominance 
127 | ## - [pleasure]:
128 | ##   - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
129 | ## - [arousal]:
130 | ##   - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
131 | ## - [dominance]:
132 | ##   - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
133 | lapply(valence(data_dictionary_ANEW), head, 8)
134 | ## $pleasure
135 | ## abduction      able  abortion    absent    absurd abundance     abuse    accept 
136 | ##      2.76      6.74      3.50      3.69      4.26      6.59      1.80      6.80 
137 | ## 
138 | ## $arousal
139 | ## abduction      able  abortion    absent    absurd abundance     abuse    accept 
140 | ##      5.53      4.30      5.39      4.73      4.36      5.51      6.83      5.53 
141 | ## 
142 | ## $dominance
143 | ## abduction      able  abortion    absent    absurd abundance     abuse    accept 
144 | ##      3.49      6.83      4.59      4.35      4.73      5.80      3.69      5.41
145 | 
146 | # compute the sentiment
147 | tail(data_corpus_inaugural) |>
148 |   textstat_valence(dictionary = data_dictionary_ANEW["pleasure"])
149 | ##       doc_id sentiment
150 | ## 1  2001-Bush  6.091330
151 | ## 2  2005-Bush  6.308839
152 | ## 3 2009-Obama  5.841437
153 | ## 4 2013-Obama  6.045129
154 | ## 5 2017-Trump  6.223944
155 | ## 6 2021-Biden  6.018528
156 | ```
157 | 
158 | We can compare two measures computed in different ways (although they
159 | are not comparable, really, since they are different lexicons):
160 | 
161 | ``` r
162 | # ensure we have this package's version of the dictionary
163 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
164 | 
165 | sent_pol <- tail(data_corpus_inaugural, 25) |>
166 |   textstat_polarity(dictionary = data_dictionary_LSD2015)
167 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment)
168 | sent_val <- tail(data_corpus_inaugural, 25) |>
169 |   textstat_valence(dictionary = data_dictionary_AFINN)
170 | 
171 | library("ggplot2")
172 | 
173 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment),
174 |        aes(x = polarity, y = valence)) +
175 |   geom_point()
176 | ```
177 | 
178 | ![](man/images/unnamed-chunk-6-1.png)<!-- -->
179 | 
180 | Good enough for government work!
181 | 
182 | ## Where to learn more
183 | 
184 | Each dictionary and function has extensive documentation, including
185 | references to social scientific research articles where each sentiment
186 | concept is described in detail. There is also a package vignette with
187 | more detailed examples.
188 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/data/data_dictionary_AFINN.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_AFINN.rda


--------------------------------------------------------------------------------
/data/data_dictionary_ANEW.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_ANEW.rda


--------------------------------------------------------------------------------
/data/data_dictionary_HuLiu.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_HuLiu.rda


--------------------------------------------------------------------------------
/data/data_dictionary_LSD2015.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LSD2015.rda


--------------------------------------------------------------------------------
/data/data_dictionary_LoughranMcDonald.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LoughranMcDonald.rda


--------------------------------------------------------------------------------
/data/data_dictionary_NRC.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_NRC.rda


--------------------------------------------------------------------------------
/data/data_dictionary_Rauh.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_Rauh.rda


--------------------------------------------------------------------------------
/data/data_dictionary_geninqposneg.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_geninqposneg.rda


--------------------------------------------------------------------------------
/data/data_dictionary_sentiws.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_sentiws.rda


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | AFINN
 2 | ANEW’
 3 | Analyzing
 4 | Codecov
 5 | ESWC
 6 | EmoLex
 7 | FL
 8 | GermanPolarityClues
 9 | HLT
10 | Heyer
11 | Hu
12 | HuLiu
13 | KDD
14 | Ks
15 | LREC
16 | Lexicoder
17 | Lifecycle
18 | Loughran
19 | LoughranMcDonald
20 | Microblogs
21 | Microposts
22 | Mikhaylov
23 | Mohammad
24 | NAACL
25 | NRC
26 | Negativ
27 | ODbL
28 | Positiv
29 | Quasthoff
30 | READMEs
31 | Rauh
32 | Rauh's
33 | Rauh’s
34 | Remus
35 | Ressources
36 | SIGKDD
37 | Saif
38 | SentiWS
39 | SentimentWortschatz
40 | Soroka
41 | Turney
42 | UF
43 | Waltinger
44 | afinn
45 | analyze
46 | analyzing
47 | damag
48 | dfm
49 | doi
50 | dones
51 | etc
52 | frac
53 | geninqposneg
54 | kein
55 | keine
56 | keinen
57 | mathrm
58 | neut
59 | nicht
60 | nichts
61 | pos
62 | quanteda
63 | sentiws
64 | textstat
65 | th
66 | tibble
67 | tokenization
68 | valenced
69 | Å
70 | Årup
71 | 


--------------------------------------------------------------------------------
/inst/extdata/afinn/AFINN-README.txt:
--------------------------------------------------------------------------------
 1 | AFINN is a list of English words rated for valence with an integer
 2 | between minus five (negative) and plus five (positive). The words have
 3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file
 4 | is tab-separated. There are two versions:
 5 | 
 6 | AFINN-111: Newest version with 2477 words and phrases.
 7 | 
 8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
 9 | are 1480 lines, as some words are listed twice. The word list in not
10 | entirely in alphabetic ordering.  
11 | 
12 | An evaluation of the word list is available in:
13 | 
14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16 | 
17 | The list was used in: 
18 | 
19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in
21 | Twitter", The 2011 International Workshop on Social Computing,
22 | Network, and Services (SocialComNet 2011).
23 | 
24 | 
25 | This database of words is copyright protected and distributed under
26 | "Open Database License (ODbL) v1.0"
27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28 | copyleft license.
29 | 
30 | See comments on the word list here:
31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32 | 
33 | 
34 | In Python the file may be read into a dictionary with:
35 | 
36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)), 
37 |                      [ line.split('\t') for line in open("AFINN-111.txt") ]))
38 | >>> afinn["Good".lower()]
39 | 3
40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41 | 2
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/man/data_dictionary_AFINN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_AFINN}
 5 | \alias{data_dictionary_AFINN}
 6 | \title{Nielsen's (2011) 'new ANEW' valenced word list}
 7 | \format{
 8 | A \link{dictionary} with one key, \code{AFINN}, with valences from -5 (negative) to +5
 9 | (positive).
10 | }
11 | \source{
12 | \url{http://www2.imm.dtu.dk/pubdb/pubs/6010-full.html}
13 | }
14 | \usage{
15 | data_dictionary_AFINN
16 | }
17 | \description{
18 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Finn Årup
19 | Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of
20 | English words rated for valence with values between -5 (negative) and +5
21 | (positive). AFINN-111, the latest version, contains 2,477 words and phrases.
22 | }
23 | \section{License}{
24 | 
25 | \href{https://opendatacommons.org/licenses/odbl/1-0/}{Open Database License (ODbL) v1.0}
26 | }
27 | 
28 | \references{
29 | Nielsen, F. Å. (2011). \href{https://arxiv.org/abs/1103.2903}{A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs.} In \emph{Proceedings
30 | of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come
31 | in Small Packages}, 93--98.
32 | }
33 | \keyword{data}
34 | 


--------------------------------------------------------------------------------
/man/data_dictionary_ANEW.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_ANEW}
 5 | \alias{data_dictionary_ANEW}
 6 | \title{Affective Norms for English Words (ANEW)}
 7 | \format{
 8 | A \link{dictionary} with three valenced keys: \code{pleasure}, \code{arousal}, and
 9 | \code{dominance}, each with valences from 1 to 9 and containing the same 2,471
10 | fixed word values.
11 | }
12 | \usage{
13 | data_dictionary_ANEW
14 | }
15 | \description{
16 | A quanteda dictionary object containing the ANEW, or Affective Norms for
17 | English Words (Bradley and Lang 2017) valenced lexicon.  The ANEW provides a
18 | lexicon of 2,471 distinct fixed word matches that are associated with three
19 | valenced categories: pleasure, arousal, and dominance.
20 | }
21 | \section{License}{
22 | 
23 | ANEW Statement of Use
24 | 
25 | In accepting the ANEW materials, I agree not to make the ANEW available to
26 | the media (television, magazines, etc.) or to place them on any internet or
27 | computer-accessible websites. I also agree not to publish the ANEW in any
28 | print format – including JOURNALS, newspapers, etc. I also agree that I will
29 | not provide the ANEW materials to profit making companies or organizations
30 | and I agree not to distribute my username and password to unauthorized
31 | parties.
32 | }
33 | 
34 | \keyword{data}
35 | 


--------------------------------------------------------------------------------
/man/data_dictionary_HuLiu.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_HuLiu}
 5 | \alias{data_dictionary_HuLiu}
 6 | \title{Positive and negative words from Hu and Liu (2004)}
 7 | \format{
 8 | A \link{dictionary} of fixed word patterns with two keys:
 9 | \itemize{
10 | \item \code{positive}: 2,006 words with positive polarity
11 | \item \code{negative}: 4,783 words with negative polarity
12 | }
13 | }
14 | \source{
15 | \url{https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html}
16 | }
17 | \usage{
18 | data_dictionary_HuLiu
19 | }
20 | \description{
21 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing 2,006
22 | positive and 4,783 negative words from Hu and Liu (2004, 2005).
23 | }
24 | \section{License}{
25 | 
26 | Unknown.
27 | }
28 | 
29 | \references{
30 | Hu, M. & Liu, B. (2004). \href{https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf}{Mining and Summarizing Customer Reviews}. In
31 | Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
32 | and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA.
33 | 
34 | Liu, M., Hu, M., & Cheng, J. (2005). \href{https://www.cs.uic.edu/~liub/publications/www05-p536.pdf}{Opinion Observer: Analyzing and Comparing Opinions on the Web}. In
35 | Proceedings of the 14th International World Wide Web conference (WWW-2005),
36 | May 10--14, 2005, Chiba, Japan.
37 | }
38 | \keyword{data}
39 | 


--------------------------------------------------------------------------------
/man/data_dictionary_LSD2015.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_LSD2015}
 5 | \alias{data_dictionary_LSD2015}
 6 | \title{Lexicoder Sentiment Dictionary (2015)}
 7 | \format{
 8 | A \link{dictionary} of four keys containing glob-style \link[=valuetype]{pattern matches}.
 9 | \describe{
10 | \item{\code{negative}}{2,858 word patterns indicating negative sentiment}
11 | \item{\code{positive}}{1,709 word patterns indicating positive sentiment}
12 | \item{\code{neg_positive}}{1,721 word patterns indicating a positive word preceded
13 | by a negation (used to convey negative sentiment)}
14 | \item{\code{neg_negative}}{2,860 word patterns indicating a negative word preceded
15 | by a negation (used to convey positive sentiment)}
16 | }
17 | }
18 | \usage{
19 | data_dictionary_LSD2015
20 | }
21 | \description{
22 | The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} \link{dictionary}
23 | format.
24 | }
25 | \details{
26 | The dictionary consists of 2,858 "negative" sentiment words and 1,709
27 | "positive" sentiment words. A further set of 2,860 and 1,721 negations of
28 | negative and positive words, respectively, is also included. While many users
29 | will find the non-negation sentiment forms of the LSD adequate for sentiment
30 | analysis, Young and Soroka (2012) did find a small, but non-negligible
31 | increase in performance when accounting for negations. Users wishing to test
32 | this or include the negations are encouraged to subtract negated positive
33 | words from the count of positive words, and subtract the negated negative
34 | words from the negative count.
35 | 
36 | Young and Soroka (2012) also suggest the use of a pre-processing script to
37 | remove specific cases of some words (i.e., "good bye", or "nobody better",
38 | which should not be counted as positive). Pre-processing scripts are
39 | available at \url{https://www.snsoroka.com/data-lexicoder/}.
40 | }
41 | \section{License and Conditions}{
42 | 
43 | The LSD is available for non-commercial academic purposes only. By using
44 | \code{data_dictionary_LSD2015}, you accept these terms.
45 | 
46 | Please cite the references below when using the dictionary.
47 | }
48 | 
49 | \examples{
50 | # checking polarity
51 | polarity(data_dictionary_LSD2015)
52 | 
53 | # simple example
54 | library("quanteda")
55 | txt <- "This aggressive policy will not win friends."
56 | 
57 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
58 |               exclusive = FALSE)
59 | ## tokens from 1 document.
60 | ## text1 :
61 | ## [1] "This"   "NEGATIVE"   "policy"   "will"   "NEG_POSITIVE"   "POSITIVE"   "POSITIVE" "."
62 | 
63 | # notice that double-counting of negated and non-negated terms is avoided
64 | # when using nested_scope = "dictionary"
65 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
66 |               exclusive = FALSE, nested_scope = "dictionary")
67 | ## tokens from 1 document.
68 | ## text1 :
69 | ## [1] "This"   "NEGATIVE"   "policy"   "will"   "NEG_POSITIVE" "POSITIVE."
70 | 
71 | # on larger examples - notice that few negations are used
72 | tail(data_corpus_inaugural) |>
73 |   tokens() |>
74 |   tokens_lookup(dictionary = data_dictionary_LSD2015) |>
75 |   dfm()
76 | }
77 | \references{
78 | The objectives, development and reliability of the dictionary are discussed
79 | in detail in Young and Soroka (2012). Please cite this article when using
80 | the Lexicoder Sentiment Dictionary and related resources.
81 | Young, L. & Soroka, S. (2012). \emph{Lexicoder Sentiment
82 | Dictionary}. Available at \url{https://www.snsoroka.com/data-lexicoder/}.
83 | 
84 | Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of
85 | Sentiment in Political Texts. \emph{Political Communication}, 29(2), 205--231.
86 | \doi{10.1080/10584609.2012.671234}
87 | }
88 | \keyword{data}
89 | 


--------------------------------------------------------------------------------
/man/data_dictionary_LoughranMcDonald.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_LoughranMcDonald}
 5 | \alias{data_dictionary_LoughranMcDonald}
 6 | \title{Loughran and McDonald Sentiment Word Lists}
 7 | \format{
 8 | An object of class \code{dictionary2} of length 9.
 9 | }
10 | \source{
11 | \url{https://sraf.nd.edu/loughranmcdonald-master-dictionary/}
12 | }
13 | \usage{
14 | data_dictionary_LoughranMcDonald
15 | }
16 | \description{
17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing
18 | the 2014 version of the Loughran and McDonald Sentiment Word Lists. The
19 | categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903),
20 | "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68)
21 | and "modal words weak" (0).
22 | }
23 | \references{
24 | Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability?
25 | Textual Analysis, Dictionaries, and 10-Ks.
26 | \emph{Journal of Finance}, 66(1), 35--65.  \doi{10.1111/j.1540-6261.2010.01625.x}
27 | }
28 | \keyword{data}
29 | 


--------------------------------------------------------------------------------
/man/data_dictionary_NRC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_NRC}
 5 | \alias{data_dictionary_NRC}
 6 | \title{NRC Word-Emotion Association Lexicon}
 7 | \format{
 8 | An object of class \code{dictionary2} of length 10.
 9 | }
10 | \source{
11 | \url{https://nrc.canada.ca/en/research-development/products-services/technical-advisory-services/sentiment-emotion-lexicons}
12 | 
13 | See also \url{http://saifmohammad.com/WebPages/AccessResource.htm}
14 | }
15 | \usage{
16 | data_dictionary_NRC
17 | }
18 | \description{
19 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Mohammad and
20 | Charron's (2010, 2013) English version of the NRC Word-Emotion Association
21 | Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight
22 | emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust)
23 | and two sentiments (negative and positive) manually annotated on Amazon's
24 | Mechanical Turk.
25 | 
26 | The Sentiment and Emotion Lexicons is a collection of lexicons that was
27 | entirely created by the experts of the National Research Council of Canada.
28 | Developed with a wide range of applications, this lexicon collection can be
29 | used in a multitude of contexts such as sentiment analysis, product
30 | marketing, consumer behaviour and even political campaign analysis.
31 | 
32 | The technology uses a list of words that help identify emotions, sentiment,
33 | as well as analyzing hashtags, emoticons and word-colour associations. The
34 | lexicons contain entries for English words, and can be used to analyze
35 | English texts.
36 | }
37 | \note{
38 | Technical and research-related questions can be addressed to Saif M.
39 | Mohammad (Senior Research Scientist at NRC):
40 | \code{Saif.Mohammad@nrc-cnrc.gc.ca}.
41 | }
42 | \section{License and Terms of Use}{
43 | 
44 | Free for research purposes.
45 | 
46 | For questions about the commercial license, email Pierre Charron (Client
47 | Relationship Leader at NRC): \code{Pierre.Charron@nrc-cnrc.gc.ca}.
48 | 
49 | Terms of Use:
50 | \itemize{
51 | \item Cite the papers associated with the lexicons in your research papers and
52 | articles that make use of them. (The papers associated with each lexicon
53 | are listed below, and also in the READMEs for individual lexicons.)
54 | \item In news articles and online posts on work using these lexicons, cite the
55 | appropriate lexicons. For example: "This application/product/tool makes
56 | use of the \verb{resource name}, created by \code{author(s)} at the National
57 | Research Council Canada." (The creators of each lexicon are listed below.
58 | Also, if you send us an email, we will be thrilled to know about how you
59 | have used the lexicon.) If possible hyperlink to this page:
60 | \url{http://saifmohammad.com/WebPages/lexicons.html}.
61 | \item If you use a lexicon in a product or application, then acknowledge this in
62 | the 'About' page and other relevant documentation of the application by
63 | stating the name of the resource, the authors, and NRC. For example: "This
64 | application/product/tool makes use of the \verb{resource name}, created by
65 | \code{author(s)} at the National Research Council Canada." (The creators of
66 | each lexicon are listed below. Also, if you send us an email, we will be
67 | thrilled to know about how you have used the lexicon.) If possible
68 | hyperlink to this page: \url{http://saifmohammad.com/WebPages/lexicons.html}.
69 | \item Do not redistribute the data. Direct interested parties to this page:
70 | \url{http://saifmohammad.com/WebPages/AccessResource.htm}.
71 | \item National Research Council Canada (NRC) disclaims any responsibility for
72 | the use of the lexicons listed here and does not provide technical
73 | support. However, the contact listed above will be happy to respond to
74 | queries and clarifications.
75 | }
76 | }
77 | 
78 | \references{
79 | Mohammad, S. & Turney, P. (2013). \href{https://arxiv.org/abs/1308.6297}{Crowdsourcing a Word-Emotion Association Lexicon}. \emph{Computational Intelligence},
80 | 29(3), 436--465.
81 | 
82 | Mohammad, S. & Turney, P. (2010). \href{https://dl.acm.org/doi/10.5555/1860631.1860635}{Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon}. In \emph{Proceedings of
83 | the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and
84 | Generation of Emotion in Text}, June 2010, LA, California.
85 | }
86 | \keyword{data}
87 | 


--------------------------------------------------------------------------------
/man/data_dictionary_Rauh.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_Rauh}
 5 | \alias{data_dictionary_Rauh}
 6 | \title{Rauh's German Political Sentiment Dictionary}
 7 | \format{
 8 | The dictionary has four keys.
 9 | \describe{
10 | \item{\code{negative}}{19,750 terms indicating negative sentiment}
11 | \item{\code{positive}}{17,330 terms indicating positive sentiment}
12 | \item{\code{neg_positive}}{17,330 terms indicating a positive word preceded
13 | by a negation (used to convey negative sentiment)}
14 | \item{\code{neg_negative}}{19,750 terms indicating a negative word preceded
15 | by a negation (used to convey positive sentiment)}
16 | }
17 | }
18 | \source{
19 | \doi{10.7910/DVN/BKBXWD}
20 | }
21 | \usage{
22 | data_dictionary_Rauh
23 | }
24 | \description{
25 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the
26 | dictionaries provided in Rauh (forthcoming). Rauh assesses its performance
27 | against human intuition of sentiment in German political language
28 | (parliamentary speeches, party manifestos, and media coverage). The resource
29 | builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and
30 | GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the
31 | negation correction provided by the dictionary, currently a combination of
32 | \link[quanteda:tokens_replace]{tokens_replace} and \link[quanteda:tokens_compound]{tokens_compound} is
33 | required to harmonize the five covered bi-gram patterns prior to scoring. The
34 | example below shows how to conduct this transformation. Note that the process
35 | changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering
36 | some of the features of the original corpus.
37 | }
38 | \examples{
39 | \donttest{
40 | # tokenize example text
41 | toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch")
42 | # replace negation markers with "not"
43 | toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein",
44 |                                                     "keine", "keinen"),
45 |                                   replacement = rep("not", 5))
46 | # compound bi-gram negation patterns
47 | toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ")
48 | 
49 | # apply dictionary
50 | tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |>
51 |   dfm()
52 | }
53 | }
54 | \references{
55 | Rauh, C. (2018). Validating a Sentiment Dictionary for German Political
56 | Language: A Workbench Note.
57 | \emph{Journal of Information Technology & Politics}, 15(4), 319--343.
58 | \doi{10.1080/19331681.2018.1485608}
59 | 
60 | Remus, R., Quasthoff U., & Heyer, G. (2010). "\href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.}"
61 | In \emph{Proceedings of the 7th International Language Resources and Evaluation
62 | (LREC'10)}, 1168--1171.
63 | 
64 | Waltinger, U. (2010). "\href{http://www.ulliwaltinger.de/pdf/91_Paper.pdf}{GermanPolarityClues: A Lexical Resource for German Sentiment Analysis}." In
65 | \emph{International Conference on Language Resources and Evaluation}, 17--23 May
66 | 2010 LREC'10.
67 | }
68 | \keyword{data}
69 | 


--------------------------------------------------------------------------------
/man/data_dictionary_geninqposneg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_geninqposneg}
 5 | \alias{data_dictionary_geninqposneg}
 6 | \title{Augmented General Inquirer \emph{Positiv} and \emph{Negativ} dictionary}
 7 | \format{
 8 | A \link{dictionary} of fixed word patterns with two keys:
 9 | \itemize{
10 | \item \code{positive}: 1,653 words with positive polarity
11 | \item \code{negative}: 2,010 words with negative polarity
12 | }
13 | }
14 | \source{
15 | \verb{http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm} --
16 | although this site ceased operating some time in 2021
17 | }
18 | \usage{
19 | data_dictionary_geninqposneg
20 | }
21 | \description{
22 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the
23 | \emph{Positiv} and \emph{Negativ} dictionary entries from the augmented
24 | General Inquirer. These are new valence categories described at
25 | \verb{http://www.wjh.harvard.edu/~inquirer/homecat.htm} but also including the
26 | terms from the "yes" "no" dictionary entries.
27 | }
28 | \references{
29 | Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966).
30 | \emph{The General Inquirer: A Computer Approach to Content Analysis.}
31 | Cambridge, MA: MIT Press.
32 | }
33 | \keyword{data}
34 | 


--------------------------------------------------------------------------------
/man/data_dictionary_sentiws.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-documentation.R
 3 | \docType{data}
 4 | \name{data_dictionary_sentiws}
 5 | \alias{data_dictionary_sentiws}
 6 | \title{SentimentWortschatz (SentiWS)}
 7 | \format{
 8 | An object of class \code{dictionary2} of length 2.
 9 | }
10 | \source{
11 | \url{https://wortschatz.uni-leipzig.de/en/download/}
12 | }
13 | \usage{
14 | data_dictionary_sentiws
15 | }
16 | \description{
17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing
18 | SentimentWortschatz (SentiWS), a publicly available German-language resource
19 | for sentiment analysis. The current version of SentiWS contains 1,650
20 | positive and 1,818 negative words, which sum up to 15,649 positive and 15,632
21 | negative word forms including their inflections. It not only contains
22 | adjectives and adverbs explicitly expressing a sentiment, but also nouns and
23 | verbs implicitly containing one. The original dictionary weights within the
24 | interval of -1 to 1. Note that the version implemented in
25 | \pkg{quanteda.dictionaries} uses a binary classification into positive
26 | (weight > 0) and negative (weight < 0) features.
27 | }
28 | \references{
29 | Remus, R., Quasthoff U., and Heyer, G. (2010). \href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS: a Publicly Available German-language Resource for Sentiment Analysis}.
30 | In \emph{Proceedings of the 7th International Language Ressources and Evaluation
31 | (LREC'10)}, 1168--1171.
32 | }
33 | \keyword{data}
34 | 


--------------------------------------------------------------------------------
/man/get_polarity_dictionary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_polarity.R
 3 | \name{get_polarity_dictionary}
 4 | \alias{get_polarity_dictionary}
 5 | \title{Get a standard polarity dictionary for sentiment analysis}
 6 | \usage{
 7 | get_polarity_dictionary(dictionary)
 8 | }
 9 | \arguments{
10 | \item{dictionary}{a \pkg{quanteda} \link{dictionary}}
11 | }
12 | \value{
13 | a single-level \link{dictionary} with keys \code{pos}, \code{neg}, and (optionally)
14 | \code{neut}.
15 | }
16 | \description{
17 | Checks and standardizes a \link{dictionary} object with its \link{polarity} set, so
18 | that the polarity categories are standardized into the keys \code{pos}, \code{neg}, and
19 | (optionally) \code{neut}.  Also checks that the dictionary contains all of the
20 | keys named in the polarity object.  (It is necessary to check here since the
21 | dictionary could have been subset after creation.)
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/images/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/man/images/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/man/polarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_polarity.R
 3 | \name{polarity}
 4 | \alias{polarity}
 5 | \alias{polarity<-}
 6 | \title{Set or get the sentiment polarity of a dictionary}
 7 | \usage{
 8 | polarity(x)
 9 | 
10 | polarity(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{a \link{dictionary} object}
14 | 
15 | \item{value}{list consisting of named character vectors \code{pos}, \code{neg}, and
16 | (optionally) \code{neut} corresponding to positive, negative, and neutral
17 | sentiment categories respectively.  Each element may contain multiple
18 | key names.  The \code{neut} category is optional but \code{pos} and \code{neg} must be
19 | supplied.}
20 | }
21 | \value{
22 | \code{polarity()} returns the polarity as a list.
23 | 
24 | \verb{polarity<-} sets the dictionary's polarity.
25 | }
26 | \description{
27 | Set or retrieve the polarity of a \link{dictionary} object for the purposes of
28 | sentiment analysis.  Polarity consists of a set of dictionary keys that are
29 | associated with positive, negative, and (optionally) neutral categories for
30 | use in \code{\link[=textstat_polarity]{textstat_polarity()}}.
31 | }
32 | \details{
33 | A dictionary may have only one set of polarities at a time, but may be
34 | changed as needed.
35 | }
36 | \examples{
37 | library("quanteda")
38 | simpledict <- dictionary(list(
39 |     happy = c("happy", "jubilant", "exuberant"),
40 |     sad = c("sad", "morose", "down")
41 | ))
42 | polarity(simpledict)
43 | polarity(simpledict) <- list(pos = "happy", neg = "sad")
44 | polarity(simpledict)
45 | 
46 | # can list multiple keys
47 | polarity(data_dictionary_LSD2015) <- list(
48 |     pos = c("positive", "neg_negative"),
49 |     neg = c("negative", "neg_positive")
50 | )
51 | polarity(data_dictionary_LSD2015)
52 | }
53 | \keyword{dictionary}
54 | \keyword{textstat}
55 | \keyword{utility}
56 | 


--------------------------------------------------------------------------------
/man/quanteda.sentiment-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quanteda.sentiment-package.R
 3 | \docType{package}
 4 | \name{quanteda.sentiment-package}
 5 | \alias{quanteda.sentiment}
 6 | \alias{quanteda.sentiment-package}
 7 | \title{quanteda.sentiment: Sentiment Analysis using 'quanteda'}
 8 | \description{
 9 | Adds functions and dictionaries for computing sentiment using the 'quanteda' package.
10 | }
11 | \author{
12 | \strong{Maintainer}: Kenneth Benoit \email{kbenoit@lse.ac.uk} [copyright holder]
13 | 
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/sentiment-functions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_polarity.R
 3 | \name{sentiment-functions}
 4 | \alias{sentiment-functions}
 5 | \alias{sent_logit}
 6 | \alias{sent_abspropdiff}
 7 | \alias{sent_relpropdiff}
 8 | \title{Sentiment functions}
 9 | \usage{
10 | sent_logit(x, smooth = 0.5)
11 | 
12 | sent_abspropdiff(x)
13 | 
14 | sent_relpropdiff(x)
15 | }
16 | \arguments{
17 | \item{x}{a \link{dfm} that has the following required feature names: \code{pos},
18 | \code{neg}, \code{neut}, and \code{other}}
19 | 
20 | \item{smooth}{additional smoothing function added to \code{pos} and \code{neg} before
21 | logarithmic transformation}
22 | }
23 | \value{
24 | a sparse \pkg{Matrix} object of documents by sentiment score, where
25 | the sentiment score is the only column.  (Its name is unimportant as this
26 | will not be used by \code{\link[=textstat_polarity]{textstat_polarity()}}.)
27 | }
28 | \description{
29 | Functions for computing sentiment, for \code{\link[=textstat_polarity]{textstat_polarity()}}.  Each function
30 | takes an input \link{dfm} with fixed feature names (see Details), and returns a
31 | sparse Matrix with a single column representing the results of the sentiment
32 | calculation.
33 | 
34 | \code{sent_logit} is \eqn{log(\frac{pos}{neg})}.
35 | 
36 | \code{sent_abspropdiff} is \eqn{\frac{pos - neg}{N}}, where \eqn{N}
37 | is the total number of all features in a document.
38 | 
39 | \code{sent_relpropdiff} is \eqn{\frac{pos - neg}{pos + neg}}.
40 | }
41 | \details{
42 | User supplied functions must take \code{x} and optional additional arguments, such
43 | as \code{smooth} for a smoothing constant for the logit scaling function. feature
44 | names for the sentiment categories \code{pos}, \code{neg}, \code{neut}, and \code{other}.  (The
45 | \code{other} category is only required when a scaling function needs the count of
46 | non-sentiment associated features.)
47 | 
48 | Additional arguments may be passed via \code{...}, such as \code{smooth} for the logit
49 | scale.
50 | }
51 | \examples{
52 | library("quanteda")
53 | dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |>
54 |   tokens() |>
55 |   dfm()
56 | sent_logit(dfmat)
57 | sent_abspropdiff(dfmat)
58 | 
59 | # user-supplied function
60 | my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
61 | my_sent_fn(dfmat)
62 | 
63 | # user supplied function with fixed weights and using neutral category
64 | dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |>
65 |   tokens() |>
66 |   dfm()
67 | my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3
68 | my_sent_fn2(dfmat2)
69 | }
70 | \references{
71 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
72 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
73 | Quarterly}, 36(1), 123–155.
74 | \doi{10.1111/j.1939-9162.2010.00006.x}
75 | }
76 | \keyword{internal}
77 | \keyword{textstat}
78 | 


--------------------------------------------------------------------------------
/man/textstat_polarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_polarity.R
 3 | \name{textstat_polarity}
 4 | \alias{textstat_polarity}
 5 | \title{Compute sentiment from key polarities}
 6 | \usage{
 7 | textstat_polarity(x, dictionary, fun = sent_logit, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing
11 | text, tokens, or features whose sentiment will be scored}
12 | 
13 | \item{dictionary}{a \link{dictionary} that has \link{polarity} set, indicating which
14 | keys are associated with positive, negative, and (optionally) neutral
15 | sentiment}
16 | 
17 | \item{fun}{function; the formula for computing sentiment, which must refer to
18 | \code{pos}, \code{neg}, and (optionally) \code{neut}.  The default is the "logit" scale
19 | (Lowe et al 2011) which is the log of (positive / negative) counts.  See
20 | \link{sentiment-functions} for details and for additional available functions,
21 | as well as details on how to supply custom functions.}
22 | 
23 | \item{...}{additional arguments passed to \code{fun}}
24 | }
25 | \value{
26 | a \link{data.frame} of sentiment scores
27 | }
28 | \description{
29 | Compute sentiment scores using a polarity approach, based on assigned
30 | categories (types or features) of positive, negative, and neutral sentiment.
31 | Several formulas for combining the polar categories are available, or the
32 | user can supply a custom function.
33 | }
34 | \examples{
35 | library("quanteda")
36 | corp <- tail(data_corpus_inaugural, n = 5)
37 | toks <- tokens(corp)
38 | dfmat <- dfm(toks)
39 | polar1 <- list(pos = "positive", neg = "negative")
40 | polar2 <- list(pos = c("positive", "neg_negative"),
41 |                neg = c("negative", "neg_positive"))
42 | 
43 | polarity(data_dictionary_LSD2015) <- polar1
44 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
45 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
46 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
47 | 
48 | polarity(data_dictionary_LSD2015) <- polar2
49 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
50 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
51 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
52 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
53 | 
54 | # with a user-supplied function
55 | sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
56 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn)
57 | }
58 | \references{
59 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
60 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
61 | Quarterly}, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}
62 | }
63 | 


--------------------------------------------------------------------------------
/man/textstat_valence.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_valence.R
 3 | \name{textstat_valence}
 4 | \alias{textstat_valence}
 5 | \title{Compute sentiment from word valences}
 6 | \usage{
 7 | textstat_valence(
 8 |   x,
 9 |   dictionary,
10 |   normalization = c("dictionary", "all", "none"),
11 |   ...
12 | )
13 | }
14 | \arguments{
15 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing
16 | text, tokens, or features whose sentiment will be scored.}
17 | 
18 | \item{dictionary}{a \pkg{quanteda} \link{dictionary} that has \link{valence} set, in
19 | the form of numerical valences associated with sentiment}
20 | 
21 | \item{normalization}{the baseline for normalizing the sentiment counts after
22 | scoring. Sentiment scores within keys are weighted means of the tokens
23 | matched to dictionary values, weighted by their valences.  The default
24 | \code{"dictionary"} is to average over only the valenced words.  \code{"all"}
25 | averages across all tokens, and \code{"none"} does no normalization.}
26 | 
27 | \item{...}{not used here}
28 | }
29 | \value{
30 | a data.frame of sentiment scores
31 | }
32 | \description{
33 | Compute sentiment scores from tokens or document-feature matrices, based on
34 | the valences of dictionary keys and values.
35 | }
36 | \note{
37 | If the input item is a \link{dfm}, then multi-word values will not be matched
38 | unless the features of the \link{dfm} have been compounded previously.  The input
39 | objects should not have had dictionaries applied previously.
40 | }
41 | \examples{
42 | library("quanteda")
43 | \dontrun{
44 | 
45 | # AFINN
46 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 
47 |                                 package = "quanteda.sentiment"),
48 |                     header = FALSE, col.names = c("word", "valence"))
49 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
50 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
51 | textstat_valence(toks, dictionary = data_dictionary_afinn)
52 | 
53 | # ANEW
54 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
55 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
56 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word,
57 |                                         arousal = anew$Word,
58 |                                         dominance = anew$Word))
59 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn,
60 |                                       arousal = anew$AroMn,
61 |                                       dominance = anew$DomMn)
62 | textstat_valence(toks, data_dictionary_anew["pleasure"])
63 | textstat_valence(toks, data_dictionary_anew["arousal"])}
64 | 
65 | }
66 | \references{
67 | For a discussion of how to aggregate sentiment scores to the document
68 | level, see:
69 | 
70 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
71 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
72 | Quarterly}, 36(1), 123–155.
73 | \doi{10.1111/j.1939-9162.2010.00006.x}
74 | }
75 | \seealso{
76 | \code{\link[=valence]{valence()}}
77 | }
78 | 


--------------------------------------------------------------------------------
/man/valence.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/textstat_valence.R
 3 | \name{valence}
 4 | \alias{valence}
 5 | \alias{valence<-}
 6 | \title{Set or get the valences of dictionary values or keys}
 7 | \usage{
 8 | valence(x)
 9 | 
10 | valence(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{a \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object}
14 | 
15 | \item{value}{named list consisting of numerical value.  The names of the
16 | elements must correspond to a dictionary key. Each element must be:
17 | \itemize{
18 | \item a single numeric value that will be applied to all of the dictionary
19 | values in that key; or
20 | \item a vector of numeric values that matches the length and order of the
21 | dictionary values in that key; or
22 | \item a named numeric vector where each element name matches dictionary values
23 | in the key.
24 | }}
25 | }
26 | \value{
27 | \code{valences()} returns the valences as a list named numeric vectors,
28 | where each list element corresponds to a key in the dictionary, and each
29 | numeric element matches a value within that key.
30 | 
31 | \verb{valence<-} sets the dictionary's valences.
32 | }
33 | \description{
34 | Set or retrieve the valences of a \link{dictionary} object for the purposes of
35 | sentiment analysis.  Valences consist of numerical values attached to each
36 | dictionary "value".  For dictionaries with a more "polarity"-based approach,
37 | see \code{\link[=textstat_polarity]{textstat_polarity()}}
38 | }
39 | \details{
40 | Valences are used only in \code{\link[=textstat_valence]{textstat_valence()}}.
41 | 
42 | A dictionary may have only one set of valences at a time, but may be
43 | changed as needed.
44 | }
45 | \examples{
46 | library("quanteda")
47 | 
48 | # setting valences
49 | dict <- dictionary(list(
50 |     happiness = c("happy", "jubilant", "exuberant", "content"),
51 |     anger = c("mad", "peeved", "irate", "furious", "livid")
52 | ))
53 | valence(dict)
54 | # using a 5-point scale: 1:1 match
55 | valence(dict) <- list(happiness = c(3, 4, 5, 2),
56 |                       anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
57 | valence(dict)
58 | # with single valences applied to all values within the keys
59 | valence(dict) <- c(happiness = 1, anger = -1)
60 | valence(dict)
61 | # with named elements - order does not matter
62 | valence(dict) <- list(
63 |     happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2)
64 | )
65 | valence(dict)
66 | 
67 | }
68 | \seealso{
69 | \code{\link[=textstat_valence]{textstat_valence()}}, \code{\link[=valence]{valence()}}
70 | }
71 | \keyword{dictionary}
72 | \keyword{textstat}
73 | \keyword{utility}
74 | 


--------------------------------------------------------------------------------
/sources/AFINN/AFINN-README.txt:
--------------------------------------------------------------------------------
 1 | AFINN is a list of English words rated for valence with an integer
 2 | between minus five (negative) and plus five (positive). The words have
 3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file
 4 | is tab-separated. There are two versions:
 5 | 
 6 | AFINN-111: Newest version with 2477 words and phrases.
 7 | 
 8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
 9 | are 1480 lines, as some words are listed twice. The word list in not
10 | entirely in alphabetic ordering.  
11 | 
12 | An evaluation of the word list is available in:
13 | 
14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16 | 
17 | The list was used in: 
18 | 
19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in
21 | Twitter", The 2011 International Workshop on Social Computing,
22 | Network, and Services (SocialComNet 2011).
23 | 
24 | 
25 | This database of words is copyright protected and distributed under
26 | "Open Database License (ODbL) v1.0"
27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28 | copyleft license.
29 | 
30 | See comments on the word list here:
31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32 | 
33 | 
34 | In Python the file may be read into a dictionary with:
35 | 
36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)), 
37 |                      [ line.split('\t') for line in open("AFINN-111.txt") ]))
38 | >>> afinn["Good".lower()]
39 | 3
40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41 | 2
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/sources/AFINN/create-data_dictionary_AFINN.R:
--------------------------------------------------------------------------------
 1 | # AFINN Dictionary
 2 | 
 3 | library("quanteda")
 4 | 
 5 | afinn111 <- read.delim("AFINN/AFINN-111.txt", header = FALSE, col.names = c("word", "valence"))
 6 | afinn96 <- read.delim("AFINN/AFINN-96.txt", header = FALSE, col.names = c("word", "valence"))
 7 | 
 8 | afinn111 <- dplyr::arrange(afinn111, word)
 9 | afinn96 <- dplyr::arrange(afinn96, word)
10 | dplyr::filter(afinn96, duplicated(afinn96$word))
11 | 
12 | data_dictionary_AFINN <- dictionary(list("AFINN" = afinn111$word))
13 | valence(data_dictionary_AFINN) <- list("AFINN" = afinn111$valence)
14 | 
15 | meta(data_dictionary_AFINN) <- list(
16 |   title = "Finn Årup Nielsen's (2011) 'new ANEW' valenced word list",
17 |   description = "AFINN is a list of English words rated for valence with an integer between minus five (negative) and plus five (positive), manually labeled by Finn Årup Nielsen in 2009-2011.  This dictionary is the newer AFINN-111 version with 2,477 words and phrases.",
18 |   url = "http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=6010",
19 |   reference = "Nielsen, F. Å. (2011). A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs. In Proceedings of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come in Small Packages, 93--98.",
20 |   license = "This database of words is copyright protected and distributed under the Open Database License (ODbL) v1.0, https://www.opendatacommons.org/licenses/odbl/1.0/"
21 | )
22 | 
23 | meta(data_dictionary_AFINN) <-
24 |   lapply(meta(data_dictionary_AFINN), function(x) Encoding(x) <- "UTF-8")
25 | 
26 | usethis::use_data(data_dictionary_AFINN, overwrite = TRUE)
27 | 
28 | 


--------------------------------------------------------------------------------
/sources/ANEW/create-data_dictionary_ANEW.R:
--------------------------------------------------------------------------------
 1 | # ANEW
 2 | 
 3 | library("quanteda")
 4 | 
 5 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
 6 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
 7 | data_dictionary_ANEW <- dictionary(list(pleasure = anew$Word, 
 8 |                                         arousal = anew$Word, 
 9 |                                         dominance = anew$Word))
10 | valence(data_dictionary_ANEW) <- list(pleasure = anew$ValMn, 
11 |                                       arousal = anew$AroMn, 
12 |                                       dominance = anew$DomMn)
13 | 
14 | meta(data_dictionary_ANEW) <- 
15 |     list(
16 |         title = "Affective Norms for English Words (ANEW)",
17 |         description = "A quanteda dictionary object containing the ANEW, or Affective Norms for English Words (Bradley and Lang 2017) valenced lexicon.  The ANEW provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance.",
18 |         url = "https://csea.phhp.ufl.edu/media.html#bottommedia",
19 |         reference = "Bradley, M.M. & Lang, P.J. (2017). Affective Norms for English Words (ANEW): Instruction manual and affective ratings. Technical Report C-3. Gainesville, FL:UF Center for the Study of Emotion and Attention.",
20 |         license = "For non-profit academic research purposes."
21 |     )
22 | 
23 | usethis::use_data(data_dictionary_ANEW, overwrite = TRUE)
24 | 
25 | 


--------------------------------------------------------------------------------
/sources/Hu-Liu/create_data_dictionary-HuLiu.R:
--------------------------------------------------------------------------------
 1 | library("quanteda")
 2 | 
 3 | data_dictionary_HuLiu <-
 4 |     dictionary(list(positive = scan(file = "Hu-Liu/positive-words.txt",
 5 |                                     what = "character", comment.char = ";"),
 6 |                     negative = scan(file = "Hu-Liu/negative-words-UTF8.txt",
 7 |                                     what = "character", comment.char = ";")))
 8 | 
 9 | meta(data_dictionary_HuLiu) <- 
10 |   list(
11 |     title = "Positive and negative words from Hu and Liu (2004)",
12 |     url = "http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html",
13 |     description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).",
14 |     reference = "Hu, M. & Liu, B. (2004). Mining and Summarizing Customer Reviews. In Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA. https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf
15 |     
16 |     Liu, M., Hu, M., & Cheng, J. (2005). Opinion Observer: Analyzing and Comparing Opinions on the Web. In Proceedings of the 14th International World Wide Web conference (WWW-2005), May 10--14, 2005, Chiba, Japan.  https://www.cs.uic.edu/~liub/publications/www05-p536.pdf",
17 |     license = "Unknown"
18 |   )
19 | polarity(data_dictionary_HuLiu) <- list(pos = "positive", neg = "negative")
20 | 
21 | usethis::use_data(data_dictionary_HuLiu, overwrite = TRUE)
22 | 


--------------------------------------------------------------------------------
/sources/Laver-Garry/Laver_and_Garry_2000.cat:
--------------------------------------------------------------------------------
  1 | CULTURE
  2 | 	CULTURE-HIGH
  3 | 		ART (1)
  4 | 		ARTISTIC (1)
  5 | 		DANCE (1)
  6 | 		GALLER* (1)
  7 | 		MUSEUM* (1)
  8 | 		MUSIC* (1)
  9 | 		OPERA* (1)
 10 | 		THEATRE* (1)
 11 | 	CULTURE-POPULAR
 12 | 		MEDIA (1)
 13 | 	SPORT
 14 | 		ANGLER* (1)
 15 | 	PEOPLE (1)
 16 | 	WAR_IN_IRAQ (1)
 17 | 	CIVIL_WAR (1)
 18 | ECONOMY
 19 | 	+STATE+
 20 | 		ACCOMMODATION (1)
 21 | 		AGE (1)
 22 | 		AMBULANCE (1)
 23 | 		ASSIST (1)
 24 | 		BENEFIT (1)
 25 | 		CARE (1)
 26 | 		CARER* (1)
 27 | 		CHILD* (1)
 28 | 		CLASS (1)
 29 | 		CLASSES (1)
 30 | 		CLINICS (1)
 31 | 		COLLECTIVE* (1)
 32 | 		CONTRIBUTION* (1)
 33 | 		COOPERATIVE* (1)
 34 | 		CO-OPERATIVE* (1)
 35 | 		DEPRIVATION (1)
 36 | 		DISABILITIES (1)
 37 | 		DISADVANTAGED (1)
 38 | 		EDUCAT* (1)
 39 | 		ELDERLY (1)
 40 | 		EQUAL* (1)
 41 | 		ESTABLISH (1)
 42 | 		FAIR* (1)
 43 | 		GUARANTEE* (1)
 44 | 		HARDSHIP (1)
 45 | 		HEALTH* (1)
 46 | 		HOMELESS* (1)
 47 | 		HOSPITAL* (1)
 48 | 		HUNGER (1)
 49 | 		INEQUAL* (1)
 50 | 		INVEST (1)
 51 | 		INVESTING (1)
 52 | 		INVESTMENT (1)
 53 | 		MEANS-TEST* (1)
 54 | 		NURSE* (1)
 55 | 		PATIENTS (1)
 56 | 		PENSION (1)
 57 | 		POOR (1)
 58 | 		POORER (1)
 59 | 		POOREST (1)
 60 | 		POVERTY (1)
 61 | 		REHOUSE* (1)
 62 | 		RE-HOUSE* (1)
 63 | 		SCHOOL (1)
 64 | 		TEACH* (1)
 65 | 		TRANSPORT (1)
 66 | 		UNDERFUND* (1)
 67 | 		UNEMPLOY* (1)
 68 | 		VULNERABLE (1)
 69 | 		WIDOW* (1)
 70 | 	=STATE=
 71 | 		ACCOUNTANT (1)
 72 | 		ACCOUNTING (1)
 73 | 		ACCOUNTS (1)
 74 | 		ADVERT* (1)
 75 | 		AIRLINE* (1)
 76 | 		AIRPORT* (1)
 77 | 		AUDIT* (1)
 78 | 		BANK* (1)
 79 | 		BARGAINING (1)
 80 | 		BREADWINNER* (1)
 81 | 		BUDGET* (1)
 82 | 		BUY* (1)
 83 | 		CARTEL* (1)
 84 | 		CASH* (1)
 85 | 		CHARGE* (1)
 86 | 		COMMERCE* (1)
 87 | 		COMPENSAT* (1)
 88 | 		CONSUM* (1)
 89 | 		COST* (1)
 90 | 		CREDIT* (1)
 91 | 		CUSTOMER* (1)
 92 | 		DEBT* (1)
 93 | 		DEFICIT* (1)
 94 | 		DWELLING* (1)
 95 | 		EARN* (1)
 96 | 		ECON* (1)
 97 | 		ELECTRICITY (1)
 98 | 		ESTATE* (1)
 99 | 		EXPORT* (1)
100 | 		FEE (1)
101 | 		FEES (1)
102 | 		FINANC* (1)
103 | 		HOUS* (1)
104 | 		IMPORT (1)
105 | 		IMPORTS (1)
106 | 		INDUSTR* (1)
107 | 		JOBS (1)
108 | 		LEASE* (1)
109 | 		LOAN* (1)
110 | 		MANUFACTUR* (1)
111 | 		MORTGAGE* (1)
112 | 		NEGOTIAT* (1)
113 | 		OPPORTUNITY (1)
114 | 		PARTNERSHIP* (1)
115 | 		PASSENGER* (1)
116 | 		PAY* (1)
117 | 		PERFORMANCE (1)
118 | 		PORT* (1)
119 | 		PRODUCTIVITY (1)
120 | 		PROFESSION* (1)
121 | 		PURCHAS* (1)
122 | 		RAILWAY* (1)
123 | 		REBATE* (1)
124 | 		RECESSION* (1)
125 | 		RESEARCH* (1)
126 | 		REVENUE* (1)
127 | 		SALAR* (1)
128 | 		SELL* (1)
129 | 		SETTLEMENT (1)
130 | 		SOFTWARE (1)
131 | 		SUPPLIER* (1)
132 | 		SUPPLY (1)
133 | 		TELECOM* (1)
134 | 		TELEPHON* (1)
135 | 		TENAN* (1)
136 | 		TOURIS* (1)
137 | 		TRADE (1)
138 | 		TRAIN* (1)
139 | 		WAGE* (1)
140 | 		WELFARE (1)
141 | 		WORK* (1)
142 | 	-STATE-
143 | 		ASSETS (1)
144 | 		AUTONOMY (1)
145 | 		BARRIER* (1)
146 | 		BID (1)
147 | 		BIDDERS (1)
148 | 		BIDDING (1)
149 | 		BURDEN* (1)
150 | 		CHARIT* (1)
151 | 		CHOICE* (1)
152 | 		COMPET* (1)
153 | 		CONFIDENCE (1)
154 | 		CONFISCATORY (1)
155 | 		CONSTRAIN* (1)
156 | 		CONTRACTING* (1)
157 | 		CONTRACTOR* (1)
158 | 		CONTROLLED (1)
159 | 		CONTROLLING (1)
160 | 		CONTROLS (1)
161 | 		CORPORATE (1)
162 | 		CORPORATION* (1)
163 | 		DEREGULATING (1)
164 | 		DISMANTL* (1)
165 | 		ENTREPRENEUR* (1)
166 | 		EXPENSIVE (1)
167 | 		FLEXIB* (1)
168 | 		FRANCHISE* (1)
169 | 		FUNDHOLD* (1)
170 | 		FUND-HOLDING (1)
171 | 		HOMESTEAD* (1)
172 | 		INITIATIVE (1)
173 | 		INTRUSIVE (1)
174 | 		INVESTOR* (1)
175 | 		LIBERALI* (1)
176 | 		MARKET* (1)
177 | 		MONETARY (1)
178 | 		MONEY (1)
179 | 		OWN* (1)
180 | 		PRIVATE (1)
181 | 		PRIVATELY (1)
182 | 		PRIVATISATIONS (1)
183 | 		PRIVATISED (1)
184 | 		PRIVATISING (1)
185 | 		PRODUCE* (1)
186 | 		PROFITABLE (1)
187 | 		REGULAT* (1)
188 | 		RETAIL* (1)
189 | 		RISK (1)
190 | 		RISKS (1)
191 | 		SAVINGS (1)
192 | 		SELL* (1)
193 | 		SHARES (1)
194 | 		SIMPLIF* (1)
195 | 		SPEND* (1)
196 | 		SPONSORSHIP (1)
197 | 		TAXABLE (1)
198 | 		TAXES (1)
199 | 		TAX-FREE (1)
200 | 		THRIFT* (1)
201 | 		TRADING (1)
202 | 		VALUE (1)
203 | 		VOLUNT* (1)
204 | 		VOUCHER* (1)
205 | ENVIRONMENT
206 | 	CON ENVIRONMENT
207 | 		PRODUC* (1)
208 | 	PRO ENVIRONMENT
209 | 		CAR (1)
210 | 		CATALYTIC (1)
211 | 		CHEMICAL* (1)
212 | 		CHIMNEY* (1)
213 | 		CLEAN* (1)
214 | 		CONGESTION (1)
215 | 		CYCLIST* (1)
216 | 		DEPLET* (1)
217 | 		ECOLOG* (1)
218 | 		EMISSION* (1)
219 | 		ENERGY-SAVING (1)
220 | 		ENVIRONMENT* (1)
221 | 		FUR (1)
222 | 		GREEN (1)
223 | 		HABITAT* (1)
224 | 		HEDGEROW* (1)
225 | 		HUSBANDED (1)
226 | 		LITTER* (1)
227 | 		OPENCAST (1)
228 | 		OPEN-CAST* (1)
229 | 		OZONE (1)
230 | 		PLANET (1)
231 | 		POPULATION (1)
232 | 		RECYCL* (1)
233 | 		RE-CYCL* (1)
234 | 		RE-USE (1)
235 | 		TOXIC (1)
236 | 		WARMING (1)
237 | GROUPS
238 | 	ETHNIC
239 | 		ASIAN* (1)
240 | 		BUDDHIST* (1)
241 | 		ETHNIC* (1)
242 | 		RACE (1)
243 | 		RACI* (1)
244 | 	WOMEN
245 | 		GIRLS (1)
246 | 		WOMAN (1)
247 | 		WOMEN (1)
248 | INSTITUTIONS
249 | 	CONSERVATIVE
250 | 		AUTHORITY (1)
251 | 		CONTINU* (1)
252 | 		DISRUPT* (1)
253 | 		INSPECT* (1)
254 | 		JURISDICTION* (1)
255 | 		LEGITIMATE (1)
256 | 		MANAG* (1)
257 | 		MORATORIUM (1)
258 | 		RUL* (1)
259 | 		STRIKE* (1)
260 | 		WHITEHALL (1)
261 | 	NEUTRAL
262 | 		ADMINISTR* (1)
263 | 		ADVIS* (1)
264 | 		AGENC* (1)
265 | 		AMALGAMAT* (1)
266 | 		APPOINT* (1)
267 | 		ASSEMBLY (1)
268 | 		CHAIR* (1)
269 | 		COMMISSION* (1)
270 | 		COMMITTEE* (1)
271 | 		CONSTITUEN* (1)
272 | 		COUNCIL* (1)
273 | 		DEPARTMENT* (1)
274 | 		DIRECTORATE* (1)
275 | 		EXECUTIVE* (1)
276 | 		HEADQUARTERS (1)
277 | 		LEGISLAT* (1)
278 | 		MECHANISM* (1)
279 | 		MINISTER* (1)
280 | 		OFFICE (1)
281 | 		OFFICES (1)
282 | 		OFFICIAL (1)
283 | 		OPERAT* (1)
284 | 		OPPOSITION (1)
285 | 		ORGANISATION* (1)
286 | 		PARLIAMENT* (1)
287 | 		PRESIDEN* (1)
288 | 		PROCEDUR* (1)
289 | 		PROCESS* (1)
290 | 		QUEEN (1)
291 | 		REGIST* (1)
292 | 		SCHEME* (1)
293 | 		SECRETARIAT* (1)
294 | 		SOVEREIGN* (1)
295 | 		SUBCOMMITTEE* (1)
296 | 		TRIBUNAL* (1)
297 | 		VOTE* (1)
298 | 		VOTING (1)
299 | 		WESTMINSTER (1)
300 | 	RADICAL
301 | 		ABOLITION (1)
302 | 		ACCOUNTABLE (1)
303 | 		ANSWERABLE (1)
304 | 		CONSULT* (1)
305 | 		CORRUPT* (1)
306 | 		DEMOCRATIC* (1)
307 | 		ELECT* (1)
308 | 		IMPLEMENT* (1)
309 | 		MODERN* (1)
310 | 		MONITOR* (1)
311 | 		REBUILD* (1)
312 | 		REEXAMINE* (1)
313 | 		REFORM* (1)
314 | 		RE-ORGANI* (1)
315 | 		REPEAL* (1)
316 | 		REPLACE* (1)
317 | 		REPRESENTAT* (1)
318 | 		SCANDAL* (1)
319 | 		SCRAP (1)
320 | 		SCRAP* (1)
321 | 		SCRUTIN* (1)
322 | 		TRANSFORM* (1)
323 | 		VOICE* (1)
324 | LAW_AND_ORDER
325 | 	LAW-CONSERVATIVE
326 | 		ASSAULTS (1)
327 | 		BAIL (1)
328 | 		BURGLAR* (1)
329 | 		CONSTAB* (1)
330 | 		CONVICT* (1)
331 | 		COURT (1)
332 | 		COURTS (1)
333 | 		CUSTOD* (1)
334 | 		DEALING (1)
335 | 		DELINQUEN* (1)
336 | 		DETER (1)
337 | 		DETER* (1)
338 | 		DISORDER (1)
339 | 		DRUG* (1)
340 | 		FINE (1)
341 | 		FINES (1)
342 | 		FIRMNESS (1)
343 | 		FORCE* (1)
344 | 		FRAUD* (1)
345 | 		GUARD* (1)
346 | 		HOOLIGAN* (1)
347 | 		ILLEGAL* (1)
348 | 		INTIMIDAT* (1)
349 | 		JOY-RIDE* (1)
350 | 		LAWLESS* (1)
351 | 		MAGISTRAT* (1)
352 | 		OFFENCE* (1)
353 | 		OFFICER* (1)
354 | 		PENAL* (1)
355 | 		POLICE (1)
356 | 		POLICEMEN (1)
357 | 		POLICING (1)
358 | 		PRISON* (1)
359 | 		PROBATION (1)
360 | 		PROSECUTION (1)
361 | 		PUNISH* (1)
362 | 		RE-OFFEND (1)
363 | 		RUC (1)
364 | 		SEIZ* (1)
365 | 		SENTENCE* (1)
366 | 		SHOP-LIFTING (1)
367 | 		SQUATTING (1)
368 | 		TERROR* (1)
369 | 		THEFT* (1)
370 | 		THUG* (1)
371 | 		TOUGH* (1)
372 | 		TRAFFICKER* (1)
373 | 		UNIFORMED (1)
374 | 		UNLAWFUL (1)
375 | 		VANDAL* (1)
376 | 		VICTIM* (1)
377 | 		VIGILAN* (1)
378 | 	LAW-LIBERAL
379 | 		HARASSMENT (1)
380 | 		NON-CUSTODIAL (1)
381 | RURAL
382 | 	AGRICULTUR* (1)
383 | 	BADGERS (1)
384 | 	BIRD* (1)
385 | 	COUNTRYSIDE (1)
386 | 	FARM* (1)
387 | 	FEED (1)
388 | 	FISH* (1)
389 | 	FOREST* (1)
390 | 	HENS (1)
391 | 	HORSE* (1)
392 | 	LANDSCAPE* (1)
393 | 	LANE* (1)
394 | 	LIVESTOCK (1)
395 | 	MEADOWS (1)
396 | 	VILLAGE* (1)
397 | 	WILDLIFE (1)
398 | URBAN
399 | 	TOWN* (1)
400 | VALUES
401 | 	CONSERVATIVE
402 | 		DEFEND (1)
403 | 		DEFENDED (1)
404 | 		DEFENDING (1)
405 | 		DISCIPLINE (1)
406 | 		GLORIES (1)
407 | 		GLORIOUS (1)
408 | 		GRAMMAR (1)
409 | 		HERITAGE (1)
410 | 		HISTOR* (1)
411 | 		HONOUR* (1)
412 | 		IMMIGRA* (1)
413 | 		INHERIT* (1)
414 | 		INTEGRITY (1)
415 | 		JUBILEE* (1)
416 | 		LEADER* (1)
417 | 		MAINTAIN (1)
418 | 		MAJESTY (1)
419 | 		MARRIAGE (1)
420 | 		OBSCEN* (1)
421 | 		PAST (1)
422 | 		PORNOGRAPH* (1)
423 | 		PRESERV* (1)
424 | 		PRIDE (1)
425 | 		PRINCIPL* (1)
426 | 		PROBITY (1)
427 | 		PROFESSIONALISM (1)
428 | 		PROUD (1)
429 | 		PUNCTUAL* (1)
430 | 		RECAPTURE* (1)
431 | 		RELIAB* (1)
432 | 		THREAT* (1)
433 | 		TRADITION* (1)
434 | 	LIBERAL
435 | 		CRUEL* (1)
436 | 		DISCRIMINAT* (1)
437 | 		HUMAN* (1)
438 | 		INJUSTICE* (1)
439 | 		INNOCENT (1)
440 | 		INTER_RACIAL (1)
441 | 		MINORIT* (1)
442 | 		REPRESSI* (1)
443 | 		RIGHTS (1)
444 | 		SEX* (1)
445 | 


--------------------------------------------------------------------------------
/sources/Laver-Garry/create-data_dictionary_LaverGarry.R:
--------------------------------------------------------------------------------
1 | # Laver and Garry Dictionary of Policy Positions
2 | 
3 | library("quanteda")
4 | 
5 | data_dictionary_LaverGarry  <- dictionary(file = "Laver-Garry/Laver_and_Garry_2000.cat")
6 | 
7 | usethis::use_data(data_dictionary_LaverGarry, overwrite = TRUE)
8 | 


--------------------------------------------------------------------------------
/sources/Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R:
--------------------------------------------------------------------------------
 1 | # Loughran and McDonald Sentiment Word Lists
 2 | 
 3 | library("quanteda")
 4 | 
 5 | data_dictionary_LoughranMcDonald  <- dictionary(file = "Loughran-McDonald/Loughran_and_McDonald_2014.cat")
 6 | 
 7 | polarity(data_dictionary_LoughranMcDonald) <- 
 8 |   list(pos = c("POSITIVE"), neg = c("NEGATIVE"))
 9 | 
10 | meta(data_dictionary_LoughranMcDonald) <- 
11 |   list(
12 |     title = "Loughran and McDonald Sentiment Word Lists",
13 |     description = "A quanteda dictionary object containing the 2014 version of the Loughran and McDonald Sentiment Word Lists. The categories are 'negative' (2355 features), 'positive' (354), 'uncertainty' (297), 'litigious' (903), 'constraining' (184), 'superfluous' (56), 'interesting' (68), 'modal words strong' (68) and 'modal words weak' (0).",
14 |     url = "http://sraf.nd.edu/textual-analysis/resources/",
15 |     reference = "Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability? Textual Analysis, Dictionaries, and 10-Ks. Journal of Finance, 66(1), 35-65.",
16 |     license = "The data compilations provided on this website are for use by individual researchers. For commercial licenses please contact mcdonald.1@nd.edu."
17 |   )
18 | 
19 | usethis::use_data(data_dictionary_LoughranMcDonald, overwrite = TRUE)
20 | 


--------------------------------------------------------------------------------
/sources/MFD/create-data_dictionary_MFD.R:
--------------------------------------------------------------------------------
1 | # Moral Foundations Dictionary
2 | 
3 | library("quanteda")
4 | 
5 | #data_dictionary_MFD <- dictionary(file = "sources/MFD/moral_foundations_dictionary.dic")
6 | data_dictionary_MFD <- dictionary(file = "MFD/mfd2.0.dic")
7 | 
8 | usethis::use_data(data_dictionary_MFD, overwrite = TRUE)
9 | 


--------------------------------------------------------------------------------
/sources/MFD/mfd2.0.dic:
--------------------------------------------------------------------------------
1 | %1	care.virtue2	care.vice3	fairness.virtue4	fairness.vice5	loyalty.virtue6	loyalty.vice7	authority.virtue8	authority.vice9	sanctity.virtue10	sanctity.vice%compassion	1empathy	1kindness	1caring	1generosity	1benevolence	1altruism	1compassionate	1nurture	1gentleness	1nurturance	1sympathy	1nurturing	1motherly	1love	1beneficence	1empathize	1helpfulness	1loving	1pity	1mercy	1nurturer	1compassionately	1nurturers	1caringly	1empathising	1merciful	1empathizing	1nurtures	1warmhearted	1empathizers	1protectiveness	1nurtured	1benevolent	1mothering	1cared	1healing	1empathises	1humane	1comfort	1pitied	1loved	1altruist	1cares	1pitying	1comforted	1hug	1comforting	1consoling	1empathizes	1sympathize	1care	1caregiver	1empathised	1hugs	1heal	1generous	1condolences	1mothered	1charitable	1generously	1pities	1condolence	1help	1consolingly	1solace	1mother	1healer	1hospitality	1charity	1empathized	1healers	1pityingly	1mothers	1child	1lovingly	1parenting	1rescuing	1rescuer	1loves	1consoled	1clothe	1sympathizing	1helping	1shared	1childhood	1mommy	1vulnerability	1helpers	1lover	1hospitable	1sharer	1feeding	1nursed	1helper	1safeness	1nurses	1protector	1motherhood	1alleviation	1nursemaid	1safeguard	1protect	1healthiness	1protecters	1patient	1nurse	1vulnerable	1benefit	1feed	1childcare	1rescuers	1hugged	1helpful	1rescues	1nursing	1protecting	1heals	1childbearing	1hugger	1relief	1healed	1rescued	1patients	1share	1rescue	1healthy	1hospitalise	1hospitalising	1hugging	1nursery	1healthier	1sharing	1helps	1sympathizers	1hospitalises	1alleviating	1wounded	1wounds	1hospitalize	1alleviate	1protective	1protection	1health	1relieve	1sympathizer	1safety	1beneficiary	1helped	1hospital	1childbirth	1benefits	1healthcare	1relievers	1feeds	1hospitalization	1benefitting	1relieving	1safe	1feeder	1benefitted	1hospitalized	1unharmful	1protects	1unharmed	1protecter	1safely	1safekeeping	1hospitalizing	1wounding	1reliever	1shares	1relieves	1alleviates	1relieved	1hospitalizes	1console	1protectorate	1alleviated	1protected	1wound	1consoles	1harm	2suffer	2hurt	2harmed	2hurting	2hurts	2cruel	2endanger	2harming	2harms	2suffering	2threaten	2inflict	2suffered	2harmful	2inflicted	2mistreat	2endangers	2damaging	2injurious	2victimize	2inflicts	2hurtful	2suffers	2inflicting	2injures	2vulnerable	2unkind	2damage	2kill	2die	2victimizes	2torment	2destroy	2brutalise	2brutalises	2distresses	2endangering	2mistreats	2afflict	2distressing	2destroys	2victimises	2maltreat	2pain	2harsh	2mistreated	2ravage	2threatened	2harass	2unkindness	2afflicted	2threatens	2threatening	2distress	2brutalize	2tormenting	2brutalized	2victimizing	2damager	2damaged	2bully	2agony	2abused	2coldhearted	2inhuman	2injured	2torments	2brutalizes	2uncompassionate	2cruelty	2tormented	2mistreating	2endangered	2uncaring	2anguishes	2destroying	2killed	2mistreatment	2bullied	2harsher	2cruelness	2tortured	2pained	2tortures	2torturing	2maltreated	2anguish	2persecutes	2maltreatment	2brutalizing	2attacked	2victim	2crying	2damages	2discomforting	2abusing	2threat	2persecute	2brutalization	2violent	2annihilated	2torturous	2harasses	2injurer	2destroyed	2molests	2molest	2injuring	2afflicts	2killing	2ache	2wounded	2persecuted	2ravages	2harassed	2exploited	2injury	2brutalisation	2discomfort	2unmerciful	2annihilate	2exploiters	2injurers	2destruction	2manhandle	2kills	2casualties	2maltreating	2victims	2harassing	2needier	2smother	2harassment	2smothers	2unhelpful	2agonize	2inhumanity	2duress	2victimization	2exploiting	2cried	2wounds	2wounding	2murderous	2ravaged	2uncaringly	2pains	2painfulness	2manhandles	2bullies	2assaulted	2uncharitable	2distressed	2persecution	2murdered	2ravaging	2discomforted	2exploitation	2torture	2murderers	2aches	2afflictions	2ungenerous	2victimizer	2agonizing	2paining	2persecuting	2exploit	2harassers	2malevolent	2stabs	2sorrowful	2assaults	2needy	2affliction	2cries	2fighting	2fight	2attack	2annihilates	2sorrow	2agonized	2assaulting	2inhospitable	2threats	2ached	2rapists	2abuser	2raped	2assassinates	2stabbed	2inhospitality	2annihilation	2punch	2harshness	2abusers	2killer	2sufferers	2victimizers	2smite	2killers	2discomforts	2fatalities	2molested	2brutality	2murdering	2torturer	2torturers	2fights	2harmfulness	2bullying	2casualty	2sufferer	2exploiter	2fatality	2punches	2abuses	2attacks	2vulnerability	2carnage	2tribulation	2annihilator	2smothering	2bullyboy	2murderer	2wound	2stabber	2tormenters	2malevolence	2raping	2smothered	2assault	2genocidal	2anguishing	2aching	2anguished	2stabbing	2rapist	2harasser	2hungers	2hunger	2molesting	2rape	2molesters	2punched	2violence	2distressingly	2molester	2stabbers	2neediness	2assassinate	2agonizingly	2tribulations	2unhelpfulness	2assaulter	2puncher	2punching	2rapes	2genocides	2attackers	2tormentor	2assassinations	2destroyers	2punchers	2sorrows	2tormenter	2threateningly	2murder	2destroyer	2assassinating	2crier	2assassinated	2molestation	2attacker	2murders	2genocide	2fighter	2assassins	2assaulters	2hungering	2achingly	2hungered	2murderess	2assassin	2exploits	2fighters	2assassination	2equality	3fairness	3justice	3rights	3equitable	3civil rights	3fairplay	3impartiality	3equal	3fairminded	3proportionality	3equalities	3fair	3integrity	3impartial	3reciprocity	3honesty	3egalitarian	3civil right	3law	3justness	3unbias	3egalitarians	3parity	3objectiveness	3reparations	3unprejudiced	3justices	3laws	3tribunals	3retribution	3reparation	3lawfully	3lawful	3honest	3compensation	3lawyers	3sportsmanship	3tribunal	3do unto others	3golden rule	3lawyer	3proportional	3equity	3lawyering	3trust	3reciprocal	3being objective	3justification	3trustworthiness	3unbiased	3vengeance	3revenge	3retributions	3equals	3equalize	3refereeing	3restitution	3compensating	3been objective	3pay back	3justified	3justifies	3retaliation	3lawyered	3compensated	3referees	3karma	3will share	3avenger	3trusting	3avengers	3square deal	3trusts	3compensate	3trustworthy	3levels the playing field	3tit for tat	3retaliate	3level the playing field	3eye for an eye	3square deals	3repayment	3payback	3equities	3justify	3dues	3square dealing	3referee	3repaid	3square dealer	3equalizer	3due processes	3level playing fields	3repay	3compensates	3justifying	3due processing	3due process	3repayments	3repaying	3level playing field	3retaliating	3square dealers	3retaliated	3refereed	3revenger	3avenging	3repays	3trusted	3avenge	3retaliates	3equalizers	3avenged	3avenges	3cheat	4unfair	4cheating	4unfairness	4injustice	4fraud	4dishonest	4unjust	4cheated	4fraudulent	4cheats	4frauds	4dishonesty	4cheaters	4deception	4injustices	4swindle	4inequity	4hypocrisy	4discrimination	4unequal	4cheater	4inequities	4defraud	4racism	4scam	4liar	4defrauds	4betrayal	4deceipt	4defrauded	4inequality	4liars	4defrauders	4hypocrite	4biased	4ripoffs	4scams	4fleecing	4defrauder	4discriminates	4mislead	4inequalities	4prejudice	4fleeced	4defrauding	4ripoff	4scamming	4imposters	4exploitation	4crooked	4oppress	4racist	4oppression	4imposter	4swindled	4hypocrites	4plagiarism	4lied	4untrustworthiness	4hoodwink	4scammed	4blackmail	4bilks	4swindling	4betrayed	4bias	4connive	4crooks	4deceive	4freeloaders	4favoritism	4disparity	4swindles	4deceived	4exploiters	4misleading	4discriminated	4bilked	4deceiving	4untrustworthy	4prejudiced	4false advertise	4scammers	4swindler	4theft	4duplicitous	4hoodwinked	4bigoted	4sexism	4disproportionate	4swindlers	4discriminate	4conniving	4sexist	4betraying	4hoodwinking	4partiality	4misleads	4disproportion	4economic disparity	4exploiter	4bilk	4biases	4bigots	4distrust	4dupe	4crook	4racists	4con artist	4bilking	4blackmailing	4deceives	4betrayers	4deceiver	4blackmailed	4duping	4shyster	4connivers	4imbalanced	4con artists	4sexists	4thieving	4betray	4imbalance	4disproportions	4disproportionately	4freeloader	4misleaders	4connived	4shysters	4scammer	4connives	4conniver	4disadvantaged	4plagiaristic	4moocher	4dupes	4discriminating	4tricked	4segregation	4false advertised	4thief	4betrayer	4bigot	4exploiting	4lying	4thieves	4stealing	4suckered	4deceivers	4bamboozled	4false advertisement	4freeload	4bamboozle	4did rob	4freeloading	4steal	4pickpocketing	4blackmailer	4prejudicing	4chauvinists	4exploit	4misleader	4hoodwinks	4false advertiser	4imbalances	4pickpocketed	4exploited	4pickpockets	4bamboozles	4tricking	4taking advantage	4pickpocket	4false advertisers	4biasing	4false impression	4bamboozling	4false witness	4robs	4moochers	4betrays	4robbing	4false advertises	4false impressions	4blackmails	4double cross	4blackmailers	4will rob	4stolen	4distrustful	4false advertising	4double crossers	4mooches	4disproportioned	4mooching	4segregated	4double crosser	4robbed	4misleadingly	4segregating	4stole	4double crosses	4being partial	4exploits	4distrusts	4mooch	4segregate	4robbers	4distrusted	4double crossing	4distrusting	4double crossed	4be partial	4go back on	4stacking the deck	4robber	4segregates	4ripping off	4trickster	4rips off	4behind their backs	4mooched	4stacked the deck	4was partial	4am partial	4stacked deck	4stacks the deck	4behind their back	4been partial	4free rider	4ripped off	4free riders	4deceivingly	4steals	4unequaled	4team player	5player	5patriot	5loyal	5loyalty	5patriots	5follower	5fidelity	5allegiance	5ally	5comrade	5loyalties	5death do us part	5faction	5comrades	5allegiances	5sacrifice	5allies	5organization	5followers	5us against them	5sacrifices	5all for one	5comradery	5one for all	5fellow	5family	5allegiant	5corps	5unity	5union jack	5uniter	5old glory	5companions	5country	5companion	5homeland	5sacrificing	5indivisible	5sacrificed	5solidarity	5troops	5nation	5cult	5kinship	5companionship	5clique	5allied	5community	5group	5factions	5familiarity	5solidarities	5enlist	5companionships	5wife	5united	5belongs	5congregation	5brothers in arms	5clan	5trooper	5sect	5enlisted	5enlistment	5tribalism	5cohorts	5war	5joining	5troop	5sacrificial	5coalition	5insider	5pledge	5cohort	5enlisting	5unite	5communion	5familiarities	5belong	5ingroup	5belonged	5company	5collective	5fellows	5cliques	5uniting	5clans	5hazing	5congregates	5herd	5sects	5uniters	5undivided	5unites	5pledgers	5coalitions	5enlists	5grouping	5insiders	5families	5troupe	5fellowship	5kin	5pledger	5horde	5nations	5tribe	5hordes	5pledges	5herder	5commune	5cults	5congregations	5organizations	5herds	5pledging	5communities	5familiar	5hazings	5belonging	5pledged	5bowed	5collectively	5together	5groups	5homelands	5collectives	5troopers	5tribes	5companies	5countries	5troupes	5fellowships	5tribal	5communes	5herders	5grouped	5herding	5congregate	5herded	5congregating	5traitor	6disloyal	6treason	6traitors	6betray	6betraying	6betrayer	6betrayers	6unpatriotic	6betrayed	6treachery	6enemies	6backstabber	6backstabbed	6heretic	6enemy	6betrays	6deserter	6infidels	6infidel	6backstab	6deserting	6apostate	6heresy	6backstabbers	6heretics	6unfaithful	6rebellion	6desertion	6deserters	6apostates	6unfaithfulness	6backstabbing	6rebel	6cheat on	6treacherous	6backstabs	6heresies	6outsider	6outgroup	6cheated on	6against us	6cheating on	6rebels	6infidelity	6outgroups	6rebellions	6outsiders	6cheats on	6respect	7obey	7authority	7obeyed	7deference	7reverence	7respecting	7obeying	7tradition	7adhere	7obeys	7revere	7govern	7comply	7respectful	7honor	7adhered	7allegiance	7dictates	7nobility	7forbid	7dominion	7governed	7obedient	7reveres	7adhering	7governs	7governing	7oppress	7respected	7respectfully	7honorable	7dictate	7commandments	7commandment	7venerate	7politeness	7respects	7obedience	7divine right	7forbids	7permission	7veneration	7hierarchy	7forbade	7honoring	7proper	7venerated	7stature	7acquiesce	7adherence	7deferential	7leadership	7punish	7forbidding	7revered	7filial piety	7patriarchs	7decree	7coerce	7dominions	7dictating	7venerating	7wear the crown	7venerates	7institution	7monarchical	7servant	7decrees	7permit	7do as one says	7supervise	7duty	7compliance	7lionize	7supervision	7take orders	7take up arms	7duties	7dictated	7elders	7emperors	7commands	7acquiesced	7emperor	7adheres	7servants	7regulations	7covenant	7hierarchical	7subordinate	7policing	7decreeing	7acquiesces	7authorizing	7nobles	7permits	7matriarchal	7authorizes	7control	7command	7subordinating	7hierarchies	7reverential	7deferentially	7punishes	7patriarch	7empires	7honored	7allegiant	7protect	7traditional	7subordination	7punished	7noble	7order	7worship	7social order	7monarchs	7ruling	7lead by example	7authorities	7guiding	7presidents	7slavishly	7patriarchy	7subordinates	7protection	7supervisers	7bow before	7fathers	7bow down	7institutions	7coersion	7governors	7commanded	7police	7authorize	7bullys	7bully	7protecting	7acquiescing	7empire	7mentor	7chiefs	7monarchies	7honors	7preside over	7acquiescent	7allegiances	7bowing	7oligarchy	7willing	7polite	7supervising	7pecking order	7compliantly	7bishops	7monarch	7slaves	7traitors	7punishments	7authorized	7protector	7compliant	7dutiful	7father	7punishment	7coerces	7toe the line	7monarchy	7obediently	7elder	7oligarchies	7dictators	7leaders	7bishop	7lorded over	7worships	7coercing	7protectors	7dictator	7protected	7punishing	7traitor	7commanding	7coerced	7commanders	7pope	7punitive	7underlings	7master	7subordinated	7president	7in charge	7matriarchy	7lionizing	7slave	7chief	7covenants	7commander	7matriarch	7authorizer	7guide	7ordered	7supervised	7captains	7punisher	7supervises	7bossing	7commandant	7governor	7protects	7admiral	7top gun	7bowed	7dominate	7arrest	7mentored	7ordering	7submit	7institutional	7prime minister	7lionizes	7ranking	7boss	7captain	7by the book	7mentors	7bullies	7dominant	7arrested	7bossed	7leader	7rank	7arresting	7chieftain	7prime ministers	7regulation	7superviser	7dean	7arrests	7punishers	7bullied	7matriarchs	7controlling	7managerial	7bosses	7ranks	7controls	7dictation	7guides	7oligarchs	7principals	7top dog	7admirals	7caste	7captaining	7queen	7mentoring	7elderly	7castes	7governess	7captained	7principal	7bullying	7submission	7dominated	7corporate ladders	7queens	7underling	7corporate ladder	7fathered	7dominates	7dominating	7presidential	7oligarch	7controlled	7submits	7submitting	7head honcho	7commandingly	7vice president	7slaving	7fathering	7slaved	7managers	7forbiddingly	7controllers	7submitted	7ringleaders	7ringleader	7controller	7ranked	7manager	7prime ministerial	7submissions	7ceo	7punishingly	7submitter	7submitters	7disrespect	8disobey	8disobedience	8anarchy	8chaos	8subversion	8subvert	8lawlessness	8subverting	8disrespecting	8sedition	8treason	8overthrow	8insurrection	8rebellion	8transgress	8treachery	8dissent	8dishonor	8dissention	8disrespects	8bedlam	8rebelling	8misrule	8transgression	8insurrectional	8pandemonium	8mutiny	8mutinies	8misruling	8disobedient	8subverted	8transgresses	8transgressed	8disarray	8misruled	8rioting	8lawless	8transgressing	8illegality	8overthrowing	8dishonorable	8dishonoring	8rebelled	8rebellions	8riot	8dishonouring	8disrespected	8permissiveness	8refuser	8unruly	8subverts	8unlawfulness	8overthrown	8anarchistic	8dishonours	8riots	8refuse	8chaotic	8nonconformity	8dissenters	8uprising	8insurrections	8rioters	8disordering	8insubordinate	8mutinied	8insurrectionist	8unlawful	8nonconformists	8heresy	8uprisings	8dishonors	8tumult	8overthrew	8overthrows	8rabble rousers	8renegade	8impolite	8renegades	8rabble rousing	8dishonored	8illegal	8rioter	8mutinous	8disarrayed	8apostates	8dissidents	8anarchists	8raise hell	8disorder	8refusers	8permissive	8apostate	8anarchist	8treacherous	8dissident	8raises hell	8disordered	8heretic	8overpower	8rabble rouser	8rebel	8raising hell	8heretics	8unathorized	8refusing	8rebels	8refuses	8rioted	8orders	8dissenter	8chaotically	8nonconformist	8heresies	8illegals	8unlawfully	8heretical	8dissents	8traditions	8dissenting	8overpowers	8trouble maker	8refused	8rock the boat	8overpowering	8tumultuous	8overpowered	8dissented	8nonconforming	8sanctity	9sacred	9sacredness	9purity	9wholesome	9pureness	9wholesomeness	9holiness	9dignity	9godly	9piety	9sanctify	9chastity	9undefiled	9holy	9sacrosanct	9pious	9righteousness	9dignities	9sanctified	9godliness	9spirituality	9chaste	9sanctifies	9righteous	9divine	9religious	9biblical	9spiritual	9deity	9sanctifying	9noble	9modesty	9decency	9scriptures	9nobility	9religion	9hallow	9soul	9hallowed	9deism	9pristine	9exalted	9hallowing	9eternal	9holy cross	9deities	9faith	9unadulterated	9scripture	9wholesomely	9divinities	9worship	9virgin	9god	9catholicism	9saintly	9saintliness	9godess	9religiosity	9purify	9koranic	9pure	9holy crosses	9exalt	9virginity	9divinity	9consecrates	9heaven	9virginal	9devout	9dignified	9tabernacle	9exalts	9buddhas	9souls	9temple	9unsullied	9heavenly	9cleanliness	9abstinance	9spotlessness	9talmudic	9deists	9gospels	9prophets	9religions	9temples	9buddhist	9godesses	9saints	9temperance	9celibacy	9consecrated	9priestly	9bless	9marriage	9prophet	9exalting	9unchaste	9supernatural	9eternally	9purification	9apostles	9monastic	9purified	9communion	9gods	9celibate	9christians	9theological	9monasticism	9unspoiled	9sterility	9christian	9buddha	9deist	9prophetic	9saint	9righteously	9apostle	9prayer	9faiths	9shrine	9purifying	9worships	9virgins	9glorious	9dignifies	9atonement	9deification	9orthodoxy	9hallows	9enshrining	9nunneries	9church	9religiously	9blessings	9consecrate	9gospel	9pray	9beatifying	9yogis	9theology	9purifies	9orthodox	9untainted	9torah	9faithfully	9catholic	9heavens	9yogi	9consecrating	9blessed	9faithful	9koran	9abstinence	9jesus	9monastery	9purities	9consecration	9catholics	9prayers	9prayed	9sterile	9blesses	9enshrined	9torahs	9organic	9bible	9glory	9allah	9glories	9priests	9dignifying	9enshrine	9mosques	9spotlessly	9prude	9reverend	9soulful	9deify	9christ	9cathedrals	9churches	9cathedral	9dignify	9monasteries	9raw	9enshrines	9refinement	9nuns	9monks	9gloriously	9almighty	9marring	9repent	9prays	9clean	9orthodoxies	9exterminates	9rabbis	9spotless	9bibles	9mosque	9immaculate	9organics	9purifier	9foods	9lord	9praying	9repenting	9marry	9elevating	9marrying	9immaculately	9rabbi	9nunnery	9priest	9food	9bloodiness	9marries	9synagogues	9synagogue	9refined	9repents	9angel	9blessing	9monk	9rabbinical	9organically	9pope	9nun	9nobles	9prophetically	9blood	9repented	9pastor	9purifiers	9lords	9bloody	9untouched	9cleaning	9exterminating	9exterminated	9imam	9higher power	9cleaners	9married	9beatification	9beatify	9extermination	9exterminate	9cleaner	9body	9immune	9atoning	9imams	9cleaned	9atones	9mary	9refines	9cleans	9atone	9immunities	9immunity	9stainless	9refining	9refine	9atoned	9exterminator	9exterminators	9impurity	10degradation	10depravity	10desecrate	10desecration	10repulsiveness	10degrading	10decay	10filth	10depravities	10defile	10sin	10fornication	10repulsive	10depraved	10impiety	10degrade	10repugnance	10impure	10degraded	10desecrations	10sinfulness	10impurities	10indecencies	10defiled	10defiles	10uncleanliness	10damnation	10debauchery	10impious	10sinful	10necrophiliacs	10desecrates	10sleaziness	10desecrating	10desecrated	10grossness	10contaminates	10sinning	10promiscuity	10befouls	10rottenness	10hedonism	10revolting	10repugnant	10godless	10scum	10befoul	10satanic	10sluttiness	10disgusting	10pestilence	10debased	10trashiness	10sins	10degradingly	10corrupting	10deprave	10perverted	10debase	10fornicating	10degraders	10defiling	10slime	10horrors	10repugnantly	10defiler	10deviants	10degrades	10corrupts	10debasing	10perverts	10parasitic	10disgusts	10deflowering	10hedonistic	10deviant	10scummy	10horrifying	10necrophilia	10contamination	10rot	10stain	10contaminating	10contaminants	10dirtying	10debases	10contaminate	10abhor	10heresy	10sleaze	10staining	10defilers	10harlot	10plagues	10sullies	10fornicators	10vermin	10befouling	10incest	10trashy	10excreting	10deforms	10abhored	10decayed	10whores	10deformities	10perverse	10adultery	10fornicate	10excrement	10harlots	10decaying	10fornicator	10unclean	10nauseating	10sully	10heresies	10satan	10damns	10satanically	10sinned	10sinners	10adulterous	10repulses	10corruption	10tainting	10deformity	10necrophiliac	10decays	10corrupted	10deforming	10contaminant	10disgust	10tarnishes	10hell	10filthy	10taint	10horrific	10fecal	10dirtied	10flesh	10stained	10deform	10putrid	10scatalogical	10dirties	10whoring	10cocksucker	10plague	10adulterers	10excretes	10infesting	10slimy	10excrete	10scuzz	10horror	10tarnish	10sexuality	10parasite	10obscenity	10deformed	10adulterer	10befouled	10muck	10corpses	10soiled	10infest	10incestuously	10incestuous	10fucker	10devil	10parasites	10stains	10skanks	10corpse	10whore	10lepers	10curses	10corrupt	10pathogens	10diseased	10deflower	10hedonists	10sinner	10debaucherous	10fester	10hedonist	10sleazy	10fucks	10promiscuous	10cursed	10curse	10apostates	10cocksuckers	10heretic	10lewdness	10slutty	10infests	10festers	10pervert	10fuck	10skanky	10dirty	10mucky	10puke	10alcoholism	10feces	10sullied	10disgustingly	10sexual	10cunt	10taints	10profane	10heretics	10fucking	10tarnishing	10fornicated	10mar	10shitting	10slut	10obscene	10barf	10rotten	10disgusted	10cunts	10waste	10parasitically	10sinfully	10wastes	10vomit	10pathogen	10rats	10pathogenic	10indecent	10infect	10leper	10indecently	10shit	10abhors	10skank	10infestation	10deflowered	10leprosy	10diseases	10heretical	10dirt	10cursing	10tarnishment	10disease	10prostitution	10infested	10apostate	10sluts	10fuckers	10profanity	10addiction	10contaminated	10scuzzy	10infectiousness	10indecency	10vomitted	10germ	10prostituting	10excreted	10rubbish	10fucked	10sodomy	10untouchables	10epidemics	10swear	10shits	10whorehouses	10pigsty	10germs	10prostituted	10mud	10dung	10epidemic	10rat	10douchebag	10perversely	10pukes	10puking	10prostitutes	10barfs	10slutting	10trashing	10whored	10douchebags	10infection	10shite	10spoil	10gross	10repulsed	10pus	10festering	10cockroaches	10tainted	10contagion	10barfed	10infects	10damned	10addictions	10shitty	10skanking	10trash	10whorehouse	10phlegm	10moldy	10plaguing	10shat	10drugged	10garbage	10infecting	10pandemics	10viruses	10nauseated	10cockroach	10puked	10drugging	10manure	10mucking	10lewd	10alcoholics	10gangrenous	10barfing	10gangrene	10shitter	10shittier	10tarnished	10cock	10vomits	10hookers	10damn	10addict	10alcoholic	10nausea	10swearing	10vomitting	10skanked	10infections	10foul	10prostitute	10risque	10lice	10gonorrhea	10wasting	10profanities	10divorces	10crappy	10spreading	10wasters	10addicting	10trashed	10addicts	10scabies	10swore	10nauseous	10phlegmatically	10spoiling	10nauseatingly	10drugs	10virus	10waster	10untouchable	10addicted	10damning	10pandemic	10hooker	10bm	10infected	10festered	10marred	10phlegmatic	10divorce	10viral	10contagiously	10plagued	10repulsing	10swears	10drug	10spoiled	10cum	10divorcing	10wasted	10divorced	10contagious	10


--------------------------------------------------------------------------------
/sources/MFD/moral_foundations_dictionary.dic:
--------------------------------------------------------------------------------
  1 | %
  2 | 01                    HarmVirtue
  3 | 02                    HarmVice
  4 | 03                    FairnessVirtue
  5 | 04                    FairnessVice
  6 | 05                    IngroupVirtue
  7 | 06                    IngroupVice
  8 | 07                    AuthorityVirtue
  9 | 08                    AuthorityVice
 10 | 09                    PurityVirtue
 11 | 10                    PurityVice
 12 | 11                    MoralityGeneral
 13 | %
 14 | safe*			01
 15 | peace*			01
 16 | compassion*		01
 17 | empath*			01
 18 | sympath*		01
 19 | care			01
 20 | caring			01
 21 | protect*		01
 22 | shield			01
 23 | shelter			01
 24 | amity			01
 25 | secur*			01
 26 | benefit*        	01
 27 | defen*			01
 28 | guard*			01
 29 | preserve		01 07 09
 30 | 
 31 | harm*			02
 32 | suffer*			02
 33 | war			02
 34 | wars			02
 35 | warl*			02
 36 | warring			02
 37 | fight*			02
 38 | violen*			02
 39 | hurt*			02
 40 | kill			02
 41 | kills			02
 42 | killer*			02
 43 | killed			02
 44 | killing			02
 45 | endanger*		02
 46 | cruel*			02
 47 | brutal*			02
 48 | abuse*			02
 49 | damag*			02
 50 | ruin*			02 10
 51 | ravage			02
 52 | detriment*		02
 53 | crush*			02
 54 | attack*			02
 55 | annihilate*		02
 56 | destroy			02
 57 | stomp			02
 58 | abandon*		02 06
 59 | spurn			02
 60 | impair			02
 61 | exploit			02 10
 62 | exploits		02 10
 63 | exploited		02 10
 64 | exploiting		02 10
 65 | wound*			02
 66 | 
 67 | fair			03
 68 | fairly			03
 69 | fairness		03
 70 | fair-*			03
 71 | fairmind*		03
 72 | fairplay		03
 73 | equal*			03
 74 | justice			03
 75 | justness		03
 76 | justifi*		03
 77 | reciproc*		03
 78 | impartial*		03
 79 | egalitar*		03
 80 | rights			03
 81 | equity			03
 82 | evenness		03
 83 | equivalent		03
 84 | unbias*			03
 85 | tolerant		03
 86 | equable			03
 87 | balance*		03
 88 | homologous		03
 89 | unprejudice*		03
 90 | reasonable		03
 91 | constant		03
 92 | honest*			03 11
 93 | 
 94 | unfair*			04
 95 | unequal*		04
 96 | bias*			04
 97 | unjust*			04
 98 | injust*			04
 99 | bigot*			04
100 | discriminat*		04
101 | disproportion*		04
102 | inequitable		04
103 | prejud*			04
104 | dishonest		04
105 | unscrupulous		04
106 | dissociate		04
107 | preference		04
108 | favoritism		04
109 | segregat*		04 05
110 | exclusion		04
111 | exclud*			04
112 | together		05
113 | nation*			05
114 | homeland*		05
115 | family			05
116 | families		05
117 | familial		05
118 | group			05
119 | loyal*			05 07
120 | patriot*		05
121 | communal		05
122 | commune*		05
123 | communit*		05
124 | communis*		05
125 | comrad*			05
126 | cadre			05
127 | collectiv*		05
128 | joint			05
129 | unison			05
130 | unite*			05
131 | fellow*			05
132 | guild			05
133 | solidarity		05
134 | devot*			05
135 | member			05
136 | cliqu*			05
137 | cohort			05
138 | ally			05
139 | insider			05
140 | foreign*		06
141 | enem*			06
142 | betray*			06 08
143 | treason*		06 08
144 | traitor*		06 08
145 | treacher*		06 08
146 | disloyal*		06 08
147 | individual*		06
148 | apostasy		06 08 10
149 | apostate		06 08 10
150 | deserted		06 08
151 | deserter*		06 08
152 | deserting		06 08
153 | deceiv*			06
154 | jilt*			06
155 | imposter		06
156 | miscreant		06
157 | spy			06
158 | sequester		06
159 | renegade		06
160 | terroris*		06
161 | immigra*		06
162 | obey*			07
163 | obedien*		07
164 | duty			07
165 | law			07
166 | lawful*			07 11
167 | legal*			07 11
168 | duti*			07
169 | honor*			07
170 | respect			07
171 | respectful*		07
172 | respected		07
173 | respects		07
174 | order*			07
175 | father*			07
176 | mother			07
177 | motherl*		07
178 | mothering		07
179 | mothers			07
180 | tradition*		07
181 | hierarch*		07
182 | authorit*		07
183 | permit			07
184 | permission		07
185 | status*			07
186 | rank*			07
187 | leader*			07
188 | class			07
189 | bourgeoisie		07
190 | caste*			07
191 | position		07
192 | complian*		07
193 | command			07
194 | supremacy		07
195 | control			07
196 | submi*			07
197 | allegian*		07
198 | serve			07
199 | abide			07
200 | defere*			07
201 | defer			07
202 | revere*			07
203 | venerat*		07
204 | comply			07
205 | defian*			08
206 | rebel*			08
207 | dissent*		08
208 | subver*			08
209 | disrespect*		08
210 | disobe*			08
211 | sediti*			08
212 | agitat*			08
213 | insubordinat*		08
214 | illegal*		08
215 | lawless*		08
216 | insurgent		08
217 | mutinous		08
218 | defy*			08
219 | dissident		08
220 | unfaithful		08
221 | alienate		08
222 | defector		08
223 | heretic*		08 10
224 | nonconformist		08
225 | oppose			08
226 | protest			08
227 | refuse			08
228 | denounce		08
229 | remonstrate		08
230 | riot*			08
231 | obstruct		08
232 | piety			09 11
233 | pious			09 11
234 | purity			09
235 | pure*			09
236 | clean*			09
237 | steril*			09
238 | sacred*			09
239 | chast*			09
240 | holy			09
241 | holiness		09
242 | saint*			09
243 | wholesome*		09 11
244 | celiba*			09
245 | abstention		09
246 | virgin			09
247 | virgins			09
248 | virginity		09
249 | virginal		09
250 | austerity		09
251 | integrity		09 11
252 | modesty			09
253 | abstinen*		09
254 | abstemiousness		09
255 | upright			09 11
256 | limpid			09
257 | unadulterated		09
258 | maiden			09
259 | virtuous		09
260 | refined			09
261 | decen*			09 11
262 | immaculate		09
263 | innocent		09
264 | pristine		09
265 | church*			09
266 | disgust*		10
267 | deprav*			10
268 | disease*		10
269 | unclean*		10
270 | contagio*		10
271 | indecen*		10 11
272 | sin			10
273 | sinful*			10
274 | sinner*			10
275 | sins			10
276 | sinned			10
277 | sinning			10
278 | slut*			10
279 | whore			10
280 | dirt*			10
281 | impiety			10
282 | impious			10
283 | profan*			10
284 | gross			10
285 | repuls*			10
286 | sick*			10
287 | promiscu*		10
288 | lewd*			10
289 | adulter*		10
290 | debauche* 		10
291 | defile*			10
292 | tramp			10
293 | prostitut*		10
294 | unchaste		10
295 | intemperate		10
296 | wanton			10
297 | profligate		10
298 | filth* 			10
299 | trashy			10
300 | obscen*			10
301 | lax			10
302 | taint*			10
303 | stain*			10
304 | tarnish*		10
305 | debase*			10
306 | desecrat*		10
307 | wicked*			10 11
308 | blemish			10
309 | exploitat*		10
310 | pervert			10
311 | wretched*		10 11
312 | righteous*		11
313 | moral*          	11
314 | ethic*          	11
315 | value*          	11
316 | upstanding      	11
317 | good            	11
318 | goodness        	11
319 | principle*      	11
320 | blameless		11
321 | exemplary		11
322 | lesson			11
323 | canon			11
324 | doctrine		11
325 | noble			11
326 | worth*			11
327 | ideal*			11
328 | praiseworthy		11
329 | commendable		11
330 | character		11
331 | proper			11
332 | laudable		11
333 | correct			11
334 | wrong*			11
335 | evil			11
336 | immoral*		11
337 | bad			11
338 | offend*			11
339 | offensive*		11
340 | transgress*		11
341 | 


--------------------------------------------------------------------------------
/sources/NRC/create_data_dictionary-NRC.R:
--------------------------------------------------------------------------------
 1 | library("quanteda")
 2 | 
 3 | class(data_dictionary_NRC) <- "dictionary2"
 4 | data_dictionary_NRC <- as.dictionary(data_dictionary_NRC)
 5 | 
 6 | meta(data_dictionary_NRC) <- 
 7 |   list(
 8 |     title = "NRC Word-Emotion Association Lexicon",
 9 |     description = "A quanteda dictionary object containing Mohammad and Charron's (2010, 2013) English version of the NRC Word-Emotion Association Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust) and two sentiments (negative and positive) manually annotated on Amazon's Mechanical Turk. Available in 40 different languages.",
10 |     url = "http://saifmohammad.com/WebPages/AccessResource.htm",
11 |     description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).",
12 |     license = "Open, or for commercial for CAD $975.",
13 |     reference = "Mohammad, S. & Turney, P. (2013). Crowdsourcing a Word-Emotion Association Lexicon. _Computational Intelligence_, 29(3), 436--465. https://arxiv.org/abs/1308.6297
14 | 
15 | Mohammad, S. & Turney, P. (2010). Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon. In _Proceedings of the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text_, June 2010, LA, California. https://dl.acm.org/doi/10.5555/1860631.1860635"
16 |     )
17 | 
18 | polarity(data_dictionary_NRC) <- list(pos = c("positive"), neg = c("negative"))
19 | 
20 | usethis::use_data(data_dictionary_NRC, overwrite = TRUE)
21 | 


--------------------------------------------------------------------------------
/sources/RID/create-data_dictionary_RID.R:
--------------------------------------------------------------------------------
1 | # Regressive Imagery Dictionary
2 | 
3 | library("quanteda")
4 | 
5 | data_dictionary_RID <-
6 |     dictionary(file = "RID/RID.CAT")
7 | devtools::use_data(data_dictionary_RID, overwrite = TRUE)
8 | 


--------------------------------------------------------------------------------
/sources/Rauh/Rauh_SentDictionaryGerman.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman.Rdata


--------------------------------------------------------------------------------
/sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata


--------------------------------------------------------------------------------
/sources/Rauh/create-data_dictionary_Rauh.R:
--------------------------------------------------------------------------------
 1 | #' Rauh's German Political Sentiment Dictionary
 2 | 
 3 | library("quanteda")
 4 | library("dplyr")
 5 | 
 6 | # load dictionary dataframes (downloaded here: https://doi.org/10.7910/DVN/BKBX)
 7 | load("Rauh/Rauh_SentDictionaryGerman_Negation.Rdata")
 8 | load("Rauh/Rauh_SentDictionaryGerman.Rdata")
 9 | 
10 | # new column where NOT and word are divided with a space
11 | neg.sent.dictionary <- neg.sent.dictionary %>% 
12 |     mutate(word = gsub("NOT_", "NOT ", feature)) %>% 
13 |     mutate(sentiment = ifelse(sentiment == 1, "neg_negative", "neg_positive"))
14 | 
15 | sent.dictionary <- sent.dictionary %>% 
16 |     mutate(word = feature) %>% 
17 |     mutate(sentiment = ifelse(sentiment == -1, "negative", "positive"))
18 | 
19 | # bind both dataframes
20 | sent_dictionary_rauh <- bind_rows(sent.dictionary, neg.sent.dictionary)
21 | 
22 | # save as quanteda dictionary (word and sentiment column)
23 | data_dictionary_Rauh <- quanteda::as.dictionary(sent_dictionary_rauh)
24 | 
25 | data_dictionary_Rauh <- as.dictionary(data_dictionary_Rauh)
26 | meta(data_dictionary_Rauh) <- 
27 |   list(
28 |     title = "Rauh's German Political Sentiment Dictionary",
29 |     description = "A quanteda dictionary object containing the dictionaries provided in Rauh (forthcoming). Rauh assesses its performance against human intuition of sentiment in German political language (parliamentary speeches, party manifestos, and media coverage). The resource builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the negation correction provided by the dictionary, currently a combination of tokens_replace and tokens_compound is required to harmonize the five covered bi-gram patterns prior to scoring. The example below shows how to conduct this transformation. Note that the process changes the terms 'nicht|nichts|kein|keine|keinen' to a joint term altering some of the features of the original corpus.",
30 |     url = "https://doi.org/10.7910/DVN/BKBXWD",
31 |     reference = "Rauh, C. (2018). Validating a Sentiment Dictionary for German Political Language: A Workbench Note. Journal of Information Technology & Politics, 15(4), 319-343.
32 | 
33 | Remus, R., Quasthoff U., & Heyer, G. (2010). \"SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.\" In Proceedings of the 7th International Language Resources and Evaluation (LREC'10), 1168-1171.
34 | 
35 | Waltinger, U. (2010). \"GermanPolarityClues: A Lexical Resource for German Sentiment Analysis.\" In International Conference on Language Resources and Evaluation, 17-23 May 2010 LREC'10.",
36 |     license = "Unknown"
37 |     )
38 | 
39 | 
40 | polarity(data_dictionary_Rauh) <- 
41 |   list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
42 | 
43 | usethis::use_data(data_dictionary_Rauh, overwrite = TRUE)
44 | 


--------------------------------------------------------------------------------
/sources/geninquirer/create-data_dictionary_geninquirer.R:
--------------------------------------------------------------------------------
 1 | library("quanteda")
 2 | 
 3 | geninquirer <- read.csv("geninquirer/inquireraugmented.csv",
 4 |                         stringsAsFactors = FALSE, comment.char = "")
 5 | GIpos <-
 6 |     c(geninquirer$Entry[geninquirer$Positiv == "Positiv"],
 7 |       geninquirer$Entry[geninquirer$Yes == "Yes"]) %>%
 8 |     char_tolower %>%
 9 |     stringi::stri_replace_all_regex("#\\w+$", "") %>%
10 |     unique
11 | GIneg <-
12 |     c(geninquirer$Entry[geninquirer$Negativ == "Negativ"],
13 |       geninquirer$Entry[geninquirer$No == "No"]) %>%
14 |     char_tolower %>%
15 |     stringi::stri_replace_all_regex("#\\w+$", "") %>%
16 |     unique
17 | data_dictionary_geninqposneg <-
18 |     dictionary(list(positive = GIpos, negative = GIneg))
19 | 
20 | meta(data_dictionary_geninqposneg) <- 
21 |   list(
22 |     title = "Augmented General Inquirer Positiv and Negativ dictionary",
23 |     url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm",
24 |     description = "A lexicon containing the Positiv and Negativ dictionary entries from the augmented
25 |       General Inquirer. These are new valence categories described at
26 |       `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also include the
27 |       terms from the 'yes' and
28 |       'no' dictionary entries.",
29 |     url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm",
30 |     license = "Open, but email the creators for commercial use. Many more categories are available.",
31 |     reference = "Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966). _The General Inquirer: A Computer Approach to Content Analysis._ Cambridge, MA: MIT Press."
32 |   )
33 | 
34 | polarity(data_dictionary_geninqposneg) <- 
35 |   list(pos = "positive", neg = "negative")
36 | 
37 | 
38 | usethis::use_data(data_dictionary_geninqposneg, overwrite = TRUE)
39 | 


--------------------------------------------------------------------------------
/sources/make_sentiment_dictionaries.R:
--------------------------------------------------------------------------------
 1 | ## (re)make all sentiment dictionaries
 2 | 
 3 | library("quanteda")
 4 | 
 5 | source("AFINN/create-data_dictionary_AFINN.R")
 6 | source("ANEW/create-data_dictionary_ANEW.R")
 7 | source("geninquirer/create-data_dictionary_geninquirer.R")
 8 | source("Hu-Liu/create_data_dictionary-HuLiu.R")
 9 | source("Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R")
10 | source("NRC/create_data_dictionary-NRC.R")
11 | source("Rauh/create-data_dictionary_Rauh.R")
12 | source("sentiws/create-data_dictionary_sentiws.R")
13 | 
14 | ## not sentiment dictionaries
15 | # source("Laver-Garry/create-data_dictionary_LaverGarry.R")
16 | # source("MFD/create-data_dictionary_MFD.R")
17 | # source("RID/create-data_dictionary_RID.R")
18 | 
19 | # LSD
20 | data("data_dictionary_LSD2015", package = "quanteda")
21 | polarity(data_dictionary_LSD2015) <- 
22 |   list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
23 | names(meta(data_dictionary_LSD2015))[which(names(meta(data_dictionary_LSD2015)) == "source")] <- "reference"
24 | usethis::use_data(data_dictionary_LSD2015, overwrite = TRUE)
25 | 


--------------------------------------------------------------------------------
/sources/sentiws/create-data_dictionary_sentiws.R:
--------------------------------------------------------------------------------
 1 | # SentiWS Dictionary
 2 | 
 3 | library("quanteda")
 4 | library("dplyr")
 5 | library("tidyr")
 6 | library("stringr")
 7 | 
 8 | read_senti_scores <- function(filename) {
 9 |     
10 |     results <- read.delim(filename, header = FALSE, encoding="UTF-8") %>%
11 |         cbind(str_split_fixed(.$V3, "[,-]",50),stringsAsFactors = FALSE) %>%
12 |         mutate(
13 |             V1 = str_sub(str_match(V1,".*\\|"),1,-2),
14 |             nr = row_number()
15 |         ) %>%
16 |         select(-V3) %>%
17 |         mutate(nr = as.character(nr)) %>%
18 |         gather(wordstem,word,V1,1:48, -nr,-V2) %>%
19 |         select(word,V2) %>% rename(score=V2) %>%
20 |         filter(word != "") %>%
21 |         arrange(word)
22 |     
23 | }
24 | 
25 | positive <- read_senti_scores("sentiws/SentiWS_v1.8c_Positive.txt") %>% 
26 |     mutate(sentiment = "positive") %>%
27 |     unique()
28 | negative <- read_senti_scores("sentiws/SentiWS_v1.8c_Negative.txt") %>% 
29 |     mutate(sentiment = "negative") %>%
30 |     unique()
31 | sentis <- bind_rows(positive, negative)
32 | 
33 | data_dictionary_sentiws <- as.dictionary(sentis)
34 | 
35 | polarity(data_dictionary_sentiws) <- 
36 |     list(pos = c("positive"), neg = c("negative"))
37 | valence(data_dictionary_sentiws) <-
38 |     list(positive = positive[!duplicated(positive$word), "score"], 
39 |          negative = negative[!duplicated(negative$word), "score"])
40 | 
41 | meta(data_dictionary_sentiws) <- 
42 |     list(
43 |         title = "SentimentWortschatz (SentiWS)",
44 |         description = "A quanteda dictionary object containing SentimentWortschatz (SentiWS), a publicly available German-language resource for sentiment analysis. The current version of SentiWS contains 1,650 positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 negative word forms including their inflections. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one. The original dictionary weights within the interval of -1 to 1. Note that the version implemented in quanteda.dictionaries uses a binary classification into positive (weight > 0) and negative (weight < 0) features.",
45 |         url = "http://wortschatz.uni-leipzig.de/en/download/",
46 |         reference = "Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly Available German-language Resource for Sentiment Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf). In _Proceedings of the 7th International Language Ressources and Evaluation (LREC'10)_, 1168--1171.",
47 |         license = "CC-BY-NC-SA 3.0"
48 |     )
49 | 
50 | usethis::use_data(data_dictionary_sentiws, overwrite = TRUE)
51 | 
52 | 


--------------------------------------------------------------------------------
/sources/test-misc.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/test-misc.R


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("spelling", quietly = TRUE))
2 |   spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 |                              skip_on_cran = TRUE)
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library("testthat")
2 | library("quanteda")
3 | library("quanteda.sentiment")
4 | 
5 | test_check("quanteda.sentiment")
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data.R:
--------------------------------------------------------------------------------
 1 | library("quanteda")
 2 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
 3 | 
 4 | test_that("dictionaries have polarities and valences set", {
 5 |   skip("skip until digits issue can be solved")
 6 |   expect_output(
 7 |     print(data_dictionary_AFINN, 0, 0),
 8 |     "Dictionary object with 1 key entry.\nValences set for keys: AFINN.",
 9 |     fixed = TRUE
10 |   )
11 | 
12 |   expect_output(
13 |     print(data_dictionary_ANEW, 0, 0),
14 |     "Dictionary object with 3 key entries.\nValences set for keys: pleasure, arousal, dominance ",
15 |     fixed = TRUE
16 |   )
17 | 
18 |   expect_output(
19 |     print(data_dictionary_geninqposneg, 0, 0),
20 |     'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
21 |     fixed = TRUE
22 |   )
23 | 
24 |   expect_output(
25 |     print(data_dictionary_HuLiu, 0, 0),
26 |     'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
27 |     fixed = TRUE
28 |   )
29 |   
30 |   expect_output(
31 |     print(data_dictionary_LoughranMcDonald, 0, 0),
32 |     'Dictionary object with 9 key entries.\nPolarities: pos = "POSITIVE"; neg = "NEGATIVE" ',
33 |     fixed = TRUE
34 |   )
35 |   
36 |   expect_output(
37 |     print(data_dictionary_LSD2015, 0, 0),
38 |     'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ',
39 |     fixed = TRUE
40 |   )
41 |   
42 |   expect_output(
43 |     print(data_dictionary_NRC, 0, 0),
44 |     'Dictionary object with 10 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
45 |     fixed = TRUE
46 |   )
47 | 
48 |   expect_output(
49 |     print(data_dictionary_Rauh, 0, 0),
50 |     'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ',
51 |     fixed = TRUE
52 |   )
53 | 
54 |   expect_output(
55 |     print(data_dictionary_sentiws, 0, 0),
56 |     'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" \nValences set for keys: positive, negative ',
57 |     fixed = TRUE
58 |   )
59 | })
60 | 
61 | test_that("dictionaries have metadata set", {
62 |   meta_ok <- function(d) {
63 |     fields <- c("title", "description", "url", "reference", "license") 
64 |     tmp <- fields %in% names(meta(d))
65 |     if (all(tmp)) {
66 |       TRUE
67 |     } else {
68 |       warning("MISSING: ", paste(fields[!tmp], collapse = " "), call. = FALSE)
69 |       FALSE
70 |     }
71 |   }
72 |   expect_true(meta_ok(data_dictionary_AFINN))
73 |   expect_true(meta_ok(data_dictionary_ANEW))
74 |   expect_true(meta_ok(data_dictionary_geninqposneg))
75 |   expect_true(meta_ok(data_dictionary_HuLiu))
76 |   expect_true(meta_ok(data_dictionary_LoughranMcDonald))
77 |   expect_true(meta_ok(data_dictionary_LSD2015))
78 |   expect_true(meta_ok(data_dictionary_NRC))
79 |   expect_true(meta_ok(data_dictionary_Rauh))
80 |   expect_true(meta_ok(data_dictionary_sentiws))
81 | })
82 | 
83 | 


--------------------------------------------------------------------------------
/tests/testthat/test-misc.R:
--------------------------------------------------------------------------------
 1 | library("quanteda")
 2 | 
 3 | test_that("printing augmented dictionary works", {
 4 |   skip("skip until digits issue can be solved")
 5 |   expect_output(
 6 |     print(data_dictionary_AFINN, 0, 0),
 7 |     "Dictionary object with 1 key entry.\nValences set for keys: AFINN ",
 8 |     fixed = TRUE
 9 |   )
10 | 
11 |   dict <- quanteda::dictionary(list(one = list(oneA = c("a", "b"),
12 |                                                oneB = "d"),
13 |                                     two = c("x", "y")))
14 |   polarity(dict) <- list(pos = "one", neg = "two")
15 |   expect_output(
16 |     print(dict, 0, 0),
17 |     'Dictionary object with 2 primary key entries and 2 nested levels.\nPolarities: pos = "one"; neg = "two" ',
18 |     fixed = TRUE
19 |   )
20 | })
21 | 
22 | test_that("friendly error messages work", {
23 |   expect_error(
24 |     textstat_polarity(0),
25 |     "textstat_polarity() only works on character, corpus, dfm, tokens objects.",
26 |     fixed = TRUE
27 |   )
28 |   expect_error(
29 |     textstat_valence(0),
30 |     "textstat_valence() only works on character, corpus, dfm, tokens objects.",
31 |     fixed = TRUE
32 |   )
33 | })
34 | 
35 | test_that("subsetting preserves valence and polarity", {
36 |   # expect_output(
37 |   #   print(data_dictionary_ANEW[1], 0, 0),
38 |   #   "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ",
39 |   #   fixed = TRUE
40 |   # )
41 |   # expect_output(
42 |   #   print(data_dictionary_ANEW["pleasure"], 0, 0),
43 |   #   "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ",
44 |   #   fixed = TRUE
45 |   # )
46 | 
47 |   dict <- quanteda::dictionary(list(one = c("a", "b"),
48 |                                     two = c("c", "d"),
49 |                                     three = c("e", "f")))
50 |   polarity(dict) <- list(pos = c("one", "two"), neg = "three")
51 | 
52 |   # expect_output(
53 |   #   print(dict[c(1, 3)], 0, 0),
54 |   #   'Dictionary object with 2 key entries.\nPolarities: pos = "one", "two"; neg = "three"',
55 |   #   fixed = TRUE
56 |   # )
57 | })
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-test.R:
--------------------------------------------------------------------------------
 1 | test_that("investigate digits problem", {
 2 |   skip("skip until digits issue can be solved")
 3 |   
 4 |   data("data_dictionary_LSD2015", package = "quanteda")
 5 |   expect_output(print(data_dictionary_LSD2015, max_nkey = 0, max_nval = 0),
 6 |                 "Dictionary object with 4 key entries.",
 7 |                 fixed = TRUE)
 8 | 
 9 |   expect_output(print(data_dictionary_geninqposneg, 0, 0),
10 |                "Dictionary object with 2 key entries.",
11 |                fixed = TRUE)
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-textstat_polarity.R:
--------------------------------------------------------------------------------
  1 | library("quanteda")
  2 | test_that("textstat_polarity works on all object types", {
  3 |     txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
  4 |              d2 = "good",
  5 |              d3 = "notsentiment",
  6 |              d4 = "Great!",
  7 |              d5 = "good good")
  8 | 
  9 |     smooth <- 0.5
 10 |     logit <- c(log(3 + smooth) - log(2 + smooth),
 11 |                log(1 + smooth) - log(0 + smooth),
 12 |                log(0 + smooth) - log(0 + smooth),
 13 |                log(1 + smooth) - log(0 + smooth),
 14 |                log(2 + smooth) - log(0 + smooth))
 15 | 
 16 |     data(data_dictionary_LSD2015, package = "quanteda.sentiment")
 17 |     
 18 |     expect_equivalent(
 19 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
 20 |         data.frame(doc_id = names(txt), sentiment = logit, stringsAsFactors = FALSE)
 21 |     )
 22 |     expect_identical(
 23 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
 24 |         textstat_polarity(corpus(txt), dictionary = data_dictionary_LSD2015)
 25 |     )
 26 |     expect_identical(
 27 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
 28 |         textstat_polarity(tokens(txt), dictionary = data_dictionary_LSD2015)
 29 |     )
 30 |     expect_identical(
 31 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
 32 |         textstat_polarity(dfm(tokens(txt)), dictionary = data_dictionary_LSD2015)
 33 |     )
 34 | })
 35 | 
 36 | test_that("different sentiment functions work as expected", {
 37 |     txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
 38 |              d2 = "good",
 39 |              d3 = "notsentiment",
 40 |              d4 = "Great!",
 41 |              d5 = "good good")
 42 | 
 43 |     # logit scale
 44 |     smooth <- 0.5
 45 |     logit <- c(log(3 + smooth) - log(2 + smooth),
 46 |                log(1 + smooth) - log(0 + smooth),
 47 |                log(0 + smooth) - log(0 + smooth),
 48 |                log(1 + smooth) - log(0 + smooth),
 49 |                log(2 + smooth) - log(0 + smooth))
 50 |     data(data_dictionary_LSD2015, package = "quanteda.sentiment")
 51 |     expect_equal(
 52 |         logit,
 53 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015)$sentiment
 54 |     )
 55 | 
 56 |     # relative proportional difference
 57 |     rpd <- c(3 - 2,
 58 |              1 - 0,
 59 |              0 - 0,
 60 |              1 - 0,
 61 |              2 - 0) / c(5, 1, 0, 1, 2)
 62 |     expect_equal(
 63 |         rpd,
 64 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015,
 65 |                            fun = sent_relpropdiff)$sentiment
 66 |     )
 67 | 
 68 |     # absolute proportional difference
 69 |     apd <- c(3 - 2,
 70 |              1 - 0,
 71 |              0 - 0,
 72 |              1 - 0,
 73 |              2 - 0) / unname(ntoken(txt))
 74 |     expect_equal(
 75 |         apd,
 76 |         textstat_polarity(txt, dictionary = data_dictionary_LSD2015,
 77 |                            fun = sent_abspropdiff)$sentiment
 78 |     )
 79 | })
 80 | 
 81 | test_that("textstat_polarity error conditions work", {
 82 |     dict <- dictionary(list(
 83 |         happy = c("happy", "jubilant", "exuberant"),
 84 |         sad = c("sad", "morose", "down"),
 85 |         okay = "just okay"
 86 |     ))
 87 |     expect_error(
 88 |         textstat_polarity("Happy, sad, neutral.", dictionary = dict),
 89 |         "polarity is not set for this dictionary; see ?polarity", 
 90 |         fixed = TRUE
 91 |     )
 92 |     
 93 | })
 94 | 
 95 | test_that("polarity functions work", {
 96 |     dict <- dictionary(list(
 97 |         happy = c("happy", "jubilant", "exuberant"),
 98 |         sad = c("sad", "morose", "down"),
 99 |         okay = "just okay"
100 |     ))
101 | 
102 |     expect_equal(polarity(dict), NULL)
103 | 
104 |     polarity(dict) <- list(pos = "happy", neg = "sad")
105 |     expect_identical(
106 |         polarity(dict),
107 |         list(pos = "happy", neg = "sad")
108 |     )
109 | 
110 |     polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
111 |     expect_identical(
112 |         polarity(dict),
113 |         list(pos = "happy", neg = "sad", neut = "okay")
114 |     )
115 | 
116 |     polarity(dict) <- list(pos = c("happy", "okay"), neg = "sad")
117 |     expect_identical(
118 |         polarity(dict),
119 |         list(pos = c("happy", "okay"), neg = "sad")
120 |     )
121 | 
122 |     expect_error(
123 |         polarity(dict) <- list(blank = "happy", neg = "sad"),
124 |         "value must be a list of 'pos', 'neg', and (optionally) 'neut'",
125 |         fixed = TRUE
126 |     )
127 |     expect_error(
128 |         polarity(dict) <- list(pos = "happy", neg = "sad", neutr = "okay"),
129 |         "value must be a list of 'pos', 'neg', and (optionally) 'neut'",
130 |         fixed = TRUE
131 |     )
132 |     
133 |     # this should generate an error
134 |     expect_error(
135 |         polarity(dict) <- list(pos = "notfound", neg = "sad"),
136 |         "'notfound' key not found in this dictionary"
137 |     )
138 | 
139 |     # should test that both pos and neg are assigned ?
140 | 
141 | })
142 | 
143 | test_that("get_polarity_dictionary() works", {
144 |     dict <- dictionary(list(
145 |         happy = c("happy", "jubilant", "exuberant"),
146 |         sad = c("sad", "morose", "down"),
147 |         okay = "just okay"
148 |     ))
149 |     expect_equal(polarity(dict), NULL)
150 |     
151 |     polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
152 | 
153 |     expect_identical(
154 |         quanteda.sentiment:::get_polarity_dictionary(dict) |>
155 |             quanteda::as.list(),
156 |         list(pos = c("happy", "jubilant", "exuberant"),
157 |              neg = c("sad", "morose", "down"),
158 |              neut = "just okay")
159 |     )
160 | 
161 |     expect_identical(
162 |         quanteda.sentiment:::get_polarity_dictionary(dict) |> polarity(),
163 |         list(pos = "pos", neg = "neg", neut = "neut")
164 |     )
165 |     
166 |     polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
167 |     dict["okay"] <- NULL
168 |     expect_error(
169 |         quanteda.sentiment:::get_polarity_dictionary(dict),
170 |         "'okay' key not found in this dictionary"
171 |     )
172 | })
173 | 
174 | test_that("nested scope works for textstatpolarity on tokens", {
175 |   dict <- dictionary(list(positive = "good", negative = "not good"))
176 |   polarity(dict) <- list(pos = "positive", neg = "negative")
177 |   valence(dict) <- c(positive = 1, negative = -1)
178 |   toks <- tokens("The test is not good")
179 |   
180 |   expect_equivalent(
181 |     textstat_polarity(toks, dictionary = dict, fun = sent_abspropdiff),
182 |     data.frame(doc_id = "text1", sentiment = -0.25, row.names = NULL)
183 |   )
184 | })
185 | 


--------------------------------------------------------------------------------
/tests/testthat/test-textstat_valence.R:
--------------------------------------------------------------------------------
  1 | library("quanteda")
  2 | 
  3 | test_that("textstat_valence works for uniform valences within key", {
  4 |   dict <- dictionary(list(positive = c("good", "great"),
  5 |                           negative = c("bad"),
  6 |                           neg_positive = "not good",
  7 |                           neg_negative = "not bad"))
  8 |   txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
  9 |            d2 = "good",
 10 |            d3 = "notsentiment",
 11 |            d4 = "Great! Not bad.",
 12 |            d5 = "good good not good bad")
 13 | 
 14 |   # for two categories
 15 |   valence(dict) <- list(positive = 1, negative = -1)
 16 | 
 17 |   corp <- corpus(txt)
 18 |   toks <- tokens(corp)
 19 |   dfmat <- dfm(toks)
 20 |   
 21 |   expect_identical(
 22 |     textstat_valence(corp, dict),
 23 |     textstat_valence(toks, dict)
 24 |   )
 25 |   expect_identical(
 26 |     textstat_valence(corp, dict),
 27 |     textstat_valence(dfmat, dict)
 28 |   )
 29 |   
 30 |   expect_identical(
 31 |     textstat_valence(corp, dict)$sentiment,
 32 |     c((3 * 1 + 2 * -1) / (3 + 2),
 33 |       (1 * 1 + 0 * -1) / (1 + 0),
 34 |       (0 * 1 + 0 * -1) / (1),
 35 |       (1 * 1 + 1 * -1) / (1 + 1),
 36 |       (3 * 1 + 1 * -1) / (3 + 1))
 37 |   )
 38 |   
 39 |   # for multiple categories within one polarity
 40 |   valence(dict) <- list(positive = 1, negative = -1, 
 41 |                         neg_negative = 1, neg_positive = -1)
 42 |   expect_identical(
 43 |     textstat_valence(corp, dict),
 44 |     textstat_valence(toks, dict)
 45 |   )
 46 |   expect_equal(
 47 |     all.equal(textstat_valence(corp, dict)$sentiment,
 48 |               textstat_valence(dfmat, dict)$sentiment),
 49 |     "Mean relative difference: 1.5"
 50 |   )
 51 |   expect_identical(
 52 |     textstat_valence(corp, dict)$sentiment,
 53 |     c((3 * 1 + 2 * -1) / (5),
 54 |       (1 * 1 + 0 * -1) / (1),
 55 |       (0 * 1 + 0 * -1) / (1),
 56 |       (2 * 1 + 0 * -1) / (2),
 57 |       (2 * 1 + 2 * -1) / (4))
 58 |   )
 59 | })
 60 | 
 61 | test_that("textstat_valence with individual value scores works", {
 62 |   dict <- dictionary(list(
 63 |     happy = c("happy", "jubilant", "exuberant"),
 64 |     sad = c("sad", "morose", "down"),
 65 |     okay = c("just okay", "okay")
 66 |   ))
 67 |   valence(dict) <- list(
 68 |     happy = c("happy" = 1, "jubilant" = 2, "exuberant" = 2),
 69 |     sad = c("sad" = -1, "morose" = -2, "down" = -1),
 70 |     okay = c("just okay" = 0.5, "okay" = 5)
 71 |   )
 72 |   txt <- c(d1 = "sad word happy word exuberant",
 73 |            d2 = "down sad just okay",
 74 |            d3 = "sad happy word word")
 75 |   
 76 |   corp <- corpus(txt)
 77 |   toks <- tokens(corp) %>%
 78 |     tokens_compound(dict, concatenator = " ")
 79 |   dfmat <- dfm(toks)
 80 |     
 81 |   expect_identical(
 82 |     textstat_valence(corp, dict),
 83 |     textstat_valence(toks, dict)
 84 |   )
 85 |   expect_identical(
 86 |     textstat_valence(corp, dict),
 87 |     textstat_valence(dfmat, dict)
 88 |   )
 89 |   
 90 |   sent <- c((-1 + 1 + 2)   / 3, # 5
 91 |             (-1 - 1 + 0.5) / 3,
 92 |             (-1 + 1)       / 2) # 4
 93 |   expect_identical(
 94 |     textstat_valence(txt, dict),
 95 |     data.frame(doc_id = docnames(dfmat),
 96 |                sentiment = sent)
 97 |   )
 98 | })
 99 | 
100 | test_that("textstat_valence error conditions work", {
101 |   dict <- dictionary(list(
102 |     happy = c("happy", "jubilant", "exuberant"),
103 |     sad = c("sad", "morose", "down"),
104 |     okay = "just okay"
105 |   ))
106 |   expect_error(
107 |     textstat_valence("Happy, sad, neutral.", dictionary = dict),
108 |     "no valenced keys found"
109 |   )
110 | })
111 | 
112 | test_that("valence assignment functions work", {
113 |   dict <- dictionary(list(
114 |     happy = c("happy", "jubilant", "exuberant"),
115 |     sad = c("sad", "morose", "down"),
116 |     okay = "just okay"
117 |   ))
118 |   
119 |   expect_equal(valence(dict), NULL)
120 |   
121 |   expect_error(
122 |     valence(dict) <- list(happy = "a", sad = -1),
123 |     "valence values must be numeric"
124 |   )
125 |   
126 |   valence(dict) <- list(happy = 1, sad = -1, okay = 0)
127 |   expect_identical(
128 |     valence(dict),
129 |     list(happy = c(happy = 1, jubilant = 1, exuberant = 1), 
130 |          sad = c(sad = -1, morose = -1, down = -1), 
131 |          okay = c(`just okay` = 0))
132 |   )
133 | })
134 | 
135 | test_that("valence error checks work", {
136 |   dict <- dictionary(list(top = c("top1", "top2"),
137 |                           nested = list(nest1 = c("a", "one"),
138 |                                         nest2 = c("b", "two"))))
139 |   expect_error(
140 |     valence(dict) <- list(top = c(1, 2), nested = -5),
141 |     "valenced dictionaries cannot be nested"
142 |   )
143 | })
144 | 
145 | test_that("dictionary print method shows valence and polarity", {
146 |   dict <- dictionary(list(
147 |     happy = c("happy", "jubilant", "exuberant"),
148 |     sad = c("sad", "morose", "down")
149 |   ))
150 |   valence(dict) <- c(happy = 1, sad = -1)
151 |   expect_output(print(dict),
152 |                 "Dictionary object with 2 key entries.
153 | Valences set for keys: happy, sad 
154 | - [happy]:
155 |   - happy, jubilant, exuberant
156 | - [sad]:
157 |   - sad, morose, down", fixed = TRUE)
158 |   
159 |   dict <- dictionary(list(
160 |     happiness = c("happy", "jubilant", "exuberant", "content"),
161 |     anger = c("mad", "peeved", "irate", "furious", "livid")
162 |   ))
163 |   valence(dict) <- list(happiness = c(3, 4, 5, 2),
164 |                         anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
165 |   expect_output(print(dict),
166 |                 "Dictionary object with 2 key entries.
167 | Valences set for keys: happiness, anger 
168 | - [happiness]:
169 |   - happy, jubilant, exuberant, content
170 | - [anger]:
171 |   - mad, peeved, irate, furious, livid", fixed = TRUE)
172 | })
173 | 
174 | test_that("overlapping values work as expected", {
175 |   dict <- dictionary(list(
176 |     happy = c("okay", "exuberant"),
177 |     sad = c("okay", "depressed")
178 |   ))
179 |   valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
180 |                         sad = c(depressed = -4, okay = -2))
181 |   expect_identical(
182 |     textstat_valence("Depressed not okay", dict)$sentiment,
183 |     (-4 + 1 - 2) / 3
184 |   )
185 |   expect_identical(
186 |     textstat_valence("Depressed not okay", dict)$sentiment,
187 |     textstat_valence(dfm(tokens("Depressed not okay")), dict)$sentiment
188 |   )
189 | })
190 | 
191 | test_that("normalization methods work for textstat_valence", {
192 |   dict <-   dict <- dictionary(list(positive = c("good", "great"),
193 |                                     negative = c("bad", "awful")))
194 |   valence(dict) <- list(positive = 1, negative = -1)
195 |   polarity(dict) <- list(pos = "positive", neg = "negative")
196 |   
197 |   txt <- c(d1 = "Good good bad other.",
198 |            d2 = "Word word other bad!",
199 |            d3 = "Great awful other £1.")
200 |   toks <- tokens(txt)
201 |   
202 |   # relative proportional difference
203 |   pol_rpd <- data.frame(doc_id = docnames(toks),
204 |                         sentiment = c( (2 - 1) / 3,
205 |                                        (0 - 1) / 1,
206 |                                        (1 - 1) / 2 ))
207 |   expect_equivalent(
208 |     textstat_polarity(toks, dict, sent_relpropdiff),
209 |     pol_rpd
210 |   )
211 |   expect_equivalent(
212 |     textstat_valence(toks, dict, normalization = "dictionary"),
213 |     textstat_polarity(toks, dict, sent_relpropdiff)
214 |   )
215 |   
216 |   # absolute proportional difference
217 |   pol_apd <- data.frame(doc_id = docnames(toks),
218 |                         sentiment = c( (2 - 1) / 5,
219 |                                        (0 - 1) / 5,
220 |                                        (1 - 1) / 6 ))
221 |   expect_equivalent(
222 |     textstat_polarity(toks, dict, sent_abspropdiff),
223 |     pol_apd
224 |   )
225 |   expect_equivalent(
226 |     textstat_valence(toks, dict, normalization = "all"),
227 |     textstat_polarity(toks, dict, sent_abspropdiff)
228 |   )
229 | 
230 |   # no normalization
231 |   expect_equivalent(
232 |     textstat_valence(toks, dict, normalization = "none"),
233 |     data.frame(doc_id = docnames(toks), sentiment = c(1, -1, 0))
234 |   )
235 |   
236 |   # logit scale
237 |   pol_log <- data.frame(doc_id = docnames(toks),
238 |                         sentiment = c( log(2 + .5) - log(1 + .5),
239 |                                        log(0 + .5) - log(1 + .5),
240 |                                        log(1 + .5) - log(1 + .5) ))
241 |   expect_equivalent(
242 |     textstat_polarity(toks, dict, sent_logit),
243 |     pol_log
244 |   )
245 |   # dfmat <- dfm(toks) %>%
246 |   #   dfm_lookup(dict)
247 |   #   dfm_weight(scheme = "logsmooth", base = exp(1))
248 |   # expect_equivalent(
249 |   #   textstat_valence(dfmat, dict),
250 |   #   textstat_polarity(toks, dict, sent_logit)
251 |   # )
252 | })
253 | 
254 | test_that("worker functions work", {
255 |   dict <- dictionary(list(positive = "good", negative = "bad"))
256 |   expect_error(
257 |     quanteda.sentiment:::flip_valence(dict),
258 |     "valence not set"
259 |   )
260 | })
261 | 
262 | test_that("valence error conditions work", {
263 |   dict <- dictionary(list(
264 |     happy = c("okay", "exuberant"),
265 |     sad = c("okay", "depressed")
266 |   ))
267 |   
268 |   expect_error(
269 |     valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
270 |                           c(depressed = -4, okay = -2)),
271 |     "valence must be a fully named list"
272 |   )
273 |   
274 |   expect_error(
275 |     valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
276 |                           other = c(depressed = -4, okay = -2)),
277 |     "'other' is not a dictionary key"
278 |   )
279 |   
280 |   expect_error(
281 |     valence(dict) <- list(happy = c(1, 3, 2)),
282 |     "valence value length not equal to number of values for key 'happy'"
283 |   )
284 | })
285 | 
286 | test_that("nested scope works for textstat_valence on tokens", {
287 |   dict <- dictionary(list(positive = "good", negative = "not good"))
288 |   polarity(dict) <- list(pos = "positive", neg = "negative")
289 |   valence(dict) <- c(positive = 1, negative = -1)
290 |   toks <- tokens("The test is not good")
291 |   
292 |   expect_equivalent(
293 |     textstat_valence(toks, dictionary = dict),
294 |     data.frame(doc_id = "text1", sentiment = -1, row.names = NULL)
295 |   )
296 | })
297 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/sentiment_analysis.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Dictionary-based sentiment analysis using quanteda"
  3 | author: "Kenneth Benoit"
  4 | output: 
  5 |   rmarkdown::html_vignette:
  6 |     toc: true
  7 | vignette: >
  8 |   %\VignetteIndexEntry{Dictionary-based sentiment analysis using quanteda}
  9 |   %\VignetteEngine{knitr::rmarkdown}
 10 |   %\VignetteEncoding{UTF-8}
 11 | ---
 12 | 
 13 | ```{r, echo = FALSE}
 14 | knitr::opts_chunk$set(
 15 |   collapse = TRUE,
 16 |   comment = "##"
 17 | )
 18 | ```
 19 | 
 20 | ## Overview
 21 | 
 22 | Sentiment analysis using dictionaries can be applied to any text, tokens, or dfm using `textstat_polarity()` or `textstat_valence()`.  This function takes the **quanteda** object as an input, along with a dictionary whose valence or polarity has been set.  The two ways of setting dictionary values allow a user to weight each _key_ with a polarity weight, or each _value_ within keys with a _valence_ weight.
 23 | 
 24 | Dictionaries consist of keys and values, where the "key" is the canonical category such as "positive" or "negative", and the "values" consist of the patterns assigned to each key that will be counted as occurrences of those keys when its dictionary is applied using `tokens_lookup()` or `dfm_lookup()`.
 25 | 
 26 | In the Lexicoder Sentiment Dictionary 2015 (`data_dictionary_LSD2015`) that is distributed with *the package, **quanteda**, instance, the dictionary has four keys, with between 1,721 and 2,860 values each:
 27 | ```{r}
 28 | library("quanteda", warn.conflicts = FALSE, verbose = FALSE)
 29 | library("quanteda.sentiment", warn.conflicts = FALSE, verbose = FALSE)
 30 | 
 31 | print(data_dictionary_LSD2015, max_nval = 5)
 32 | lengths(data_dictionary_LSD2015)
 33 | ```
 34 | As can be seen, these use "glob" pattern matches and may be multi-word values, such as "a lie" or "no damag*".
 35 | 
 36 | 
 37 | ## Polarity and valence
 38 | 
 39 | Dictionary-based sentiment analysis in **quanteda** can take place in two different forms, depending on whether dictionary keys are part of a _polarity_-based sentiment scheme -- such as positive versus negative dictionary categories (keys) -- or whether a continuous sentiment score is associated with individual word patterns, what we call a _valence_-based sentiment scheme.
 40 | 
 41 | Dictionaries can have both polarity and valence weights, but these are not used in the same sentiment scoring scheme.  "Polarity" is a category of one of two "poles" (such as negative and positive) applied to dictionary keys, whereas "valence" is a weight applied individually to each value within a key.
 42 | 
 43 | ### Polarity weights
 44 | 
 45 | Polarity weighting assigns the following categories to dictionary keys, to represent the "poles":
 46 | * `pos` -- a "positive" end of the scale, although this notion does not need literally to mean positive sentiment.  It could indicate any polar position, such as terms indicating confidence.
 47 | * `neg` -- a "negative" end of the scale, although once again, this does not need literally to mean negative sentiment.  In the example of "positive" indicating confidence, for instance, the "negative" pole could indicate tentative or uncertain language.
 48 | * optionally, a `neut` category can also be identified, if this is desired.
 49 | 
 50 | Dictionary keys are linked to each pole using the `polarity() <-` assignment function.  The keys linked to each pole will be indicated in the summary information when the dictionary is printed, or returned as a list when calling the function `polarity()`.
 51 | 
 52 | ```{r}
 53 | polarity(data_dictionary_LSD2015)
 54 | polarity(data_dictionary_LSD2015) <- list(pos = "positive", neg = "negative")
 55 | ```
 56 | 
 57 | Poles can be linked to multiple dictionary keys.  For instance, in the Lexicoder 2015 dictionary, there are also two "negation" keys, 
 58 | `neg_positive` and `neg_negative`, meant to negate the positive terms, and negate negative terms.  To add these to our polarities, we would simply assign them as a list.
 59 | 
 60 | ```{r}
 61 | polarity(data_dictionary_LSD2015)
 62 | polarity(data_dictionary_LSD2015) <- 
 63 |   list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
 64 | print(data_dictionary_LSD2015, 0, 0)
 65 | ```
 66 | 
 67 | 
 68 | ### Valence weights
 69 | 
 70 | Valence weighting is value-based, allowing individual numeric weights to be assigned to word patterns ("values"), rather than being a single pole attached to all of the values in a dictionary key.  This allows different weights to be assigned within dictionary keys, for instance with different strengths of positivity or negativity.
 71 | 
 72 | If we wanted to nuance this dictionary, for instance, we could assign valences to each key:
 73 | ```{r}
 74 | dict <- dictionary(list(quality = c("bad", "awful", "horrific",
 75 |                                     "good", "great", "amazing")))
 76 | dict
 77 | ```
 78 | This dictionary has no valences until they are set.  To assign valences, we use the `valence()` replacement function, assigning it a list with the values equal to the dictionary structure.  The name of the list elements should match the dictionary key whose valence is being set, and elements each key should be a vector of valences.  When this numeric vector is named, order does not matter; otherwise, the order used will be that of the dictionary's values.
 79 | ```{r}
 80 | valence(dict) <- list(quality = c(amazing = 2.2, awful = -1.5, bad = -1, 
 81 |                                   horrific = -2, good = 1, great = 1.7))
 82 | ```
 83 | Now, we can see that the valences are set:
 84 | ```{r}
 85 | dict
 86 | valence(dict)
 87 | ```
 88 | Because valences are set within key, different keys can have different valences, even when the word values are the same.  So we could add a second key like this:
 89 | ```{r}
 90 | dict["location"] <- dict["quality"]
 91 | valence(dict)["location"] <- list(location = c(amazing = 2.2, awful = -1.5, bad = -1,
 92 |                                                horrific = -2, good = 1, great = 1.7))
 93 | print(dict, 0, 0)
 94 | ```
 95 | 
 96 | 
 97 | This allows sentiment to be counted for dictionaries like the [Affective Norms for English Words (ANEW)](https://csea.phhp.ufl.edu/media.html#bottommedia) dictionary, which has numerical weights from 1.0 to 9.0 for word values in each of three categories: pleasure, arousal, and dominance.  As a **quanteda** dictionary, this would consist of three dictionary keys (one for each of pleasure, arousal, and dominance) and each word pattern would form a value in each key.  Each word value, furthermore, would have a valence.  This allows a single dictionary to contain multiple categories of valence, which can be combined or examined separately using `textstat_sentiment()`.  We return to the example of the ANEW dictionary below.
 98 | 
 99 | Valence can also be assigned to provide the same weight to every value within a key, making it equivalent to polarity.  For instance:
100 | ```{r}
101 | dict <- dictionary(list(neg = c("bad", "awful", "horrific"),
102 |                         pos = c("good", "great", "amazing")))
103 | valence(dict) <- list(neg = -1, pos = 1)
104 | print(dict)
105 | valence(dict)
106 | ```
107 | 
108 | ### Effects of polarity and valence weights on other functions
109 | 
110 | These weights are not currently used by any function other than `textstat_polarity()` and `textstat_valence()`.  When using dictionaries with a polarity or valence in any other function, these have no effect.  Dictionaries with polarity or valence set operate in every other respect just like regular **quanteda** dictionaries with no polarity or valence.
111 | 
112 | 
113 | ## Computing sentiment with polarities
114 | 
115 | ### Simple example with the LSD 2015 dictionary
116 | 
117 | Let's take simple example of a text with some positive and negative words found in the LSD2015 dictionary.  The polarities of this dictionary are assigned by default, so we will erase our local copy and use the one found in the **quanteda.sentiment** package.
118 | 
119 | ```{r}
120 | txt <- c(doc1 = "This is a fantastic, wonderful example.",
121 |          doc2 = "The settlement was not amiable.",
122 |          doc3 = "The good, the bad, and the ugly.")
123 | toks <- tokens(txt)
124 | 
125 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
126 | polarity(data_dictionary_LSD2015)
127 | ```
128 | 
129 | First, let's see what will be matched.  
130 | ```{r}
131 | tokens_lookup(toks, data_dictionary_LSD2015, nested_scope = "dictionary", 
132 |               exclusive = FALSE)
133 | ```
134 | Notice the `nested_scope = "dictionary"` argument.  This tells the lookup function to consider the scope at which to stop "nesting" the value matches across the dictionary, rather than the default which is within keys.  Otherwise, the tokens "not", "amiable" in `doc2` would be matched twice: one for the positive key, matched from the value `"amiab*"`; and once for the `neg_positive` key, matched from the value `not amiab*"`.  With the entire dictionary as the `nested_scope`, however, the (`neg_positive`) `"not amiab*"` is matched first, and then the shorter value from the other (`positive`) key `"amiab*"` is not also matched.
135 | 
136 | To compute a polarity-based sentiment score, we need a formula specifying how the categories will be combined.  This is supplied through the `fun` argument, which names a function for scoring sentiment through a combination of `pos`, `neg`, and optionally `neut` and `N`, where `N` is short for the total number of tokens or features.
137 | 
138 | The **quanteda.sentiment** package includes three functions for converting polarities into a continuous index of sentiment, from Lowe et. al. (2011).  These are:  
139 | 
140 | * `sent_logit`, a logit scale computed as $\mathrm{log}(pos + 0.5) -  \mathrm{log}(neg + 0.5))$, also the default method;
141 | 
142 | * `sent_abspropdiff`, the "absolute proportional difference" scale comparing the difference between positive and negative mentions as a proportion of all counts: computed as $\frac{pos - neg}{N}$; and
143 | 
144 | * `sent_relpropdiff`, the "relative proportional difference" scale comparing the difference between positive and negative mentions as a proportion of only the total positive and negative mentions, computed as $\frac{pos - neg}{pos + neg}$.
145 | 
146 | Additional custom functions, including those making use of the $neut$ category or using custom weights, can be supplied through the `fun` argument in `textstat_polarity()`, with additional arguments to `fun` supplied through `...` (for instance, the `smooth` argument in `sent_logit`)
147 | 
148 | So to compute sentiment for the example, we simply need to call `textstat_polarity()`:
149 | ```{r}
150 | textstat_polarity(toks, data_dictionary_LSD2015)
151 | ```
152 | Or for an alternative scale:
153 | ```{r}
154 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_relpropdiff)
155 | ```
156 | ## Example on real texts
157 | 
158 | Let's apply the LSD 2015 to political speeches, namely the inaugural addresses of the US presidents since 1970.  We'll use the negation categories too.  Notice that we don't even need to tokenize the text here, since the `textstat_polarity()` function can take a corpus as input (and will take care of the appropriate tokenization on its own).
159 | 
160 | ```{r}
161 | polarity(data_dictionary_LSD2015) <- 
162 |   list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
163 | 
164 | sent_pres <- data_corpus_inaugural %>%
165 |   corpus_subset(Year > 1970) %>%
166 |   textstat_polarity(data_dictionary_LSD2015)
167 | sent_pres
168 | ```
169 | We can plot this:
170 | ```{r}
171 | library("ggplot2")
172 | ggplot(sent_pres) +
173 |     geom_point(aes(x = sentiment, y = reorder(doc_id, sentiment))) +
174 |     ylab("")
175 | ```
176 | 
177 | ## Computing sentiment with valences
178 | 
179 | Valences provide a more flexible method for computing sentiment analysis based on sentiment values, or valences, attached to specific word patterns.
180 | 
181 | ### Simple example with user-supplied valences
182 | 
183 | For a dictionary whose polarity or sentiment has been set, computing sentiment is simple: `textstat_sentiment()` is applied to the object along with the dictionary.  Here, we demonstrate this for the LSD2105.
184 | 
185 | ```{r}
186 | txt <- c(doc1 = "This is a fantastic, wonderful example.",
187 |          doc2 = "The settlement was not amiable.",
188 |          doc3 = "The good, the bad, and the ugly.")
189 | toks <- tokens(txt)
190 | 
191 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1)
192 | ```
193 | 
194 | 
195 | To compute sentiment, `textstat_sentiment()` will count the two positive and zero negative matches from the first example, and average these across all matches, for score of 1.0.  In the second document, the positive match will generate a score of 1.0, and in the third document, the scores will be `sum(1, -1, -1) / 3 = -0.33`.
196 | ```{r}
197 | textstat_valence(toks, data_dictionary_LSD2015)
198 | ```
199 | 
200 | Note that if we include the other dictionary keys, however, then "not amicable" will be matched in the `neg_positive` count, rather than the word "amicable" being counted as positive.  Because many dictionary values may be multi-word patterns, we always recommend using `textstat_sentiment()` on tokens, rather than on `dfm` objects whose features are dictionary keys rather than values.
201 | ```{r}
202 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1, 
203 |                                          neg_negative = 1, neg_positive = -1)
204 | textstat_valence(toks, data_dictionary_LSD2015)
205 | ```
206 | 
207 | Here, document 2 is now computed as -1 because its dictionary match is actually to the "neg_positive" category that has a valence of -1.  The sentiment function ignored the key whose polarity was not set before, but applies it with `nested_scope = "dictionary"` when it is set, to ensure that only the longer phrase is matched.
208 | ```{r}
209 | tokens_lookup(toks, data_dictionary_LSD2015, exclusive = FALSE, 
210 |               nested_scope = "dictionary")
211 | ```
212 | 
213 | ### Using the AFINN dictionary
214 | 
215 | We can build this dictionary from scratch using the source data:
216 | ```{r}
217 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt", 
218 |                               package = "quanteda.sentiment"),
219 |                     header = FALSE, col.names = c("word", "valence"))
220 | head(afinn)
221 | ```
222 | 
223 | To make this into a **quanteda** dictionary:
224 | ```{r}
225 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
226 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
227 | data_dictionary_afinn
228 | ```
229 | This dictionary has a single key we have called "afinn", with the valences set from the original `afinn` data.frame/tibble.
230 | 
231 | We can now use this to apply `textstat_valence()`:
232 | ```{r}
233 | textstat_valence(toks, data_dictionary_afinn)
234 | ```
235 | How was this computed?  We can use the dictionary to examine the words, and also to get their sentiment.
236 | ```{r}
237 | tokssel <- tokens_select(toks, data_dictionary_afinn)
238 | tokssel
239 | 
240 | valence(data_dictionary_afinn)$afinn[as.character(tokssel)]
241 | ```
242 | So here, doc1 had a score of `(4 + 4) / 2 = 4`, doc2 has no score because none of its tokens matched values in the AFINN dictionary, and doc3 was `(3 + -3 + -3) / 3 = -1`.
243 | 
244 | ### Using the ANEW dictionary with multiple keys
245 | 
246 | The ANEW, or Affective Norms for English Words (Bradley and Lang 2017), provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance.  Reading in the original format, we have to convert this into a **quanteda** dictionary format, and add the valence values.  Because this format requires a list of separate keys, we need to create a dictionary key for each of the three categories, and assign the lexicon to each key.  With the ANEW, it just so happens that the lexicon -- or "values" in **quanteda** parlance -- are the same for each key, but this is not a necessary feature of valenced dictionaries.
247 | 
248 | ```{r}
249 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
250 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
251 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word, 
252 |                                         arousal = anew$Word, 
253 |                                         dominance = anew$Word))
254 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn, 
255 |                                       arousal = anew$AroMn, 
256 |                                       dominance = anew$DomMn)
257 | ```
258 | 
259 | Now we can see that we have the dictionary in **quanteda** format with the valences attached.  We also see that the values are the same in each key.
260 | ```{r}
261 | print(data_dictionary_anew, max_nval = 5)
262 | ```
263 | 
264 | The best way to compute sentiment is to choose a key and use it separately, because each key here contains the same values. 
265 | ```{r}
266 | textstat_valence(toks, data_dictionary_anew["pleasure"])
267 | textstat_valence(toks, data_dictionary_anew["arousal"])
268 | ```
269 | 
270 | If we don't subset the dictionary keys, it will combine them, which is probably not want we want:
271 | ```{r}
272 | textstat_valence(toks, data_dictionary_anew)
273 | 
274 | tokssel <- tokens_select(toks, data_dictionary_anew)
275 | vals <- lapply(valence(data_dictionary_anew), 
276 |                function(x) x[as.character(tokssel)])
277 | vals
278 | ```
279 | Without selection, the average is across all three keys:
280 | ```{r}
281 | mean(unlist(vals))
282 | ```
283 | 
284 | ## Equivalences between polarity and valence approaches
285 | 
286 | Valences can be set to produce equivalent results to sentiment, if this is desired.  Considering our brief example above, and making sure we have both polarity and valence set for the LSD2015, we can show this for the two non-logit scale polarity functions.
287 | ```{r}
288 | corpus(txt)
289 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1, 
290 |                                          neg_negative = 1, neg_positive = -1)
291 | print(data_dictionary_LSD2015, 0, 0)
292 | ```
293 | Computing this by absolute proportional difference:
294 | ```{r}
295 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_abspropdiff)
296 | ```
297 | is the same as computing it this way using valences:
298 | ```{r}
299 | textstat_valence(txt, data_dictionary_LSD2015, norm = "all")
300 | ```
301 | For the relative proportional difference:
302 | ```{r}
303 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_relpropdiff)
304 | textstat_valence(txt, dictionary = data_dictionary_LSD2015, norm = "dict")
305 | ```
306 | 
307 | 
308 | ## References
309 | 
310 | Bradley, M.M. & Lang, P.J. (2017). [Affective Norms for English Words (ANEW): Instruction manual and affective ratings](https://pdodds.w3.uvm.edu/teaching/courses/2009-08UVM-300/docs/others/everything/bradley1999a.pdf). _Technical Report C-3_. Gainesville, FL: UF Center for the Study of Emotion and Attention.
311 | 
312 | Liu, B. (2015). _Sentiment analysis: Mining opinions, sentiments, and emotions_. Cambridge University Press.
313 | 
314 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). Scaling Policy Preferences from Coded Political Texts. _Legislative Studies Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}.
315 | 


--------------------------------------------------------------------------------