├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── aaa.R
├── data-documentation.R
├── print.R
├── quanteda.sentiment-package.R
├── re-exports.R
├── textstat_polarity.R
└── textstat_valence.R
├── README.Rmd
├── README.md
├── codecov.yml
├── data
├── data_dictionary_AFINN.rda
├── data_dictionary_ANEW.rda
├── data_dictionary_HuLiu.rda
├── data_dictionary_LSD2015.rda
├── data_dictionary_LoughranMcDonald.rda
├── data_dictionary_NRC.rda
├── data_dictionary_Rauh.rda
├── data_dictionary_geninqposneg.rda
└── data_dictionary_sentiws.rda
├── inst
├── WORDLIST
└── extdata
│ └── afinn
│ ├── AFINN-111.txt
│ ├── AFINN-96.txt
│ └── AFINN-README.txt
├── man
├── data_dictionary_AFINN.Rd
├── data_dictionary_ANEW.Rd
├── data_dictionary_HuLiu.Rd
├── data_dictionary_LSD2015.Rd
├── data_dictionary_LoughranMcDonald.Rd
├── data_dictionary_NRC.Rd
├── data_dictionary_Rauh.Rd
├── data_dictionary_geninqposneg.Rd
├── data_dictionary_sentiws.Rd
├── get_polarity_dictionary.Rd
├── images
│ ├── unnamed-chunk-5-1.png
│ └── unnamed-chunk-6-1.png
├── polarity.Rd
├── quanteda.sentiment-package.Rd
├── sentiment-functions.Rd
├── textstat_polarity.Rd
├── textstat_valence.Rd
└── valence.Rd
├── sources
├── AFINN
│ ├── AFINN-111.txt
│ ├── AFINN-96.txt
│ ├── AFINN-README.txt
│ └── create-data_dictionary_AFINN.R
├── ANEW
│ └── create-data_dictionary_ANEW.R
├── Hu-Liu
│ ├── create_data_dictionary-HuLiu.R
│ ├── negative-words-UTF8.txt
│ └── positive-words.txt
├── Laver-Garry
│ ├── Laver_and_Garry_2000.cat
│ └── create-data_dictionary_LaverGarry.R
├── Loughran-McDonald
│ ├── Loughran_and_McDonald_2014.cat
│ └── create-data_dictionary_LoughranMcDonald.R
├── MFD
│ ├── create-data_dictionary_MFD.R
│ ├── mfd2.0.dic
│ └── moral_foundations_dictionary.dic
├── NRC
│ └── create_data_dictionary-NRC.R
├── RID
│ ├── RID.CAT
│ └── create-data_dictionary_RID.R
├── Rauh
│ ├── Rauh_SentDictionaryGerman.Rdata
│ ├── Rauh_SentDictionaryGerman_Negation.Rdata
│ └── create-data_dictionary_Rauh.R
├── geninquirer
│ ├── create-data_dictionary_geninquirer.R
│ └── inquireraugmented.csv
├── make_sentiment_dictionaries.R
├── sentiws
│ ├── create-data_dictionary_sentiws.R
│ ├── sentiws_v1.8c_negative.txt
│ └── sentiws_v1.8c_positive.txt
├── test-misc.R
└── uk_us_english
│ └── data_dict_usbr.csv
├── tests
├── spelling.R
├── testthat.R
└── testthat
│ ├── test-data.R
│ ├── test-misc.R
│ ├── test-test.R
│ ├── test-textstat_polarity.R
│ └── test-textstat_valence.R
└── vignettes
├── .gitignore
└── sentiment_analysis.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | ^appveyor\.yml$
5 | ^codecov\.yml$
6 | ^README\.Rmd$
7 | ^\.github$
8 | ^doc$
9 | ^Meta$
10 | sources
11 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 |
15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 |
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | config:
21 | - {os: macos-latest, r: 'release'}
22 | - {os: windows-latest, r: 'release'}
23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
24 | - {os: ubuntu-latest, r: 'release'}
25 | - {os: ubuntu-latest, r: 'oldrel-1'}
26 |
27 | env:
28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 | R_KEEP_PKG_SOURCE: yes
30 |
31 | steps:
32 | - uses: actions/checkout@v4
33 |
34 | - uses: r-lib/actions/setup-pandoc@v2
35 |
36 | - uses: r-lib/actions/setup-r@v2
37 | with:
38 | r-version: ${{ matrix.config.r }}
39 | http-user-agent: ${{ matrix.config.http-user-agent }}
40 | use-public-rspm: true
41 |
42 | - uses: r-lib/actions/setup-r-dependencies@v2
43 | with:
44 | extra-packages: any::rcmdcheck
45 | needs: check
46 |
47 | - uses: r-lib/actions/check-r-package@v2
48 | with:
49 | upload-snapshots: true
50 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: test-coverage
10 |
11 | jobs:
12 | test-coverage:
13 | runs-on: ubuntu-latest
14 | env:
15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - uses: r-lib/actions/setup-r@v2
21 | with:
22 | use-public-rspm: true
23 |
24 | - uses: r-lib/actions/setup-r-dependencies@v2
25 | with:
26 | extra-packages: any::covr
27 | needs: coverage
28 |
29 | - name: Test coverage
30 | run: |
31 | covr::codecov(
32 | quiet = FALSE,
33 | clean = FALSE,
34 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
35 | )
36 | shell: Rscript {0}
37 |
38 | - name: Show testthat output
39 | if: always()
40 | run: |
41 | ## --------------------------------------------------------------------
42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 | shell: bash
44 |
45 | - name: Upload test results
46 | if: failure()
47 | uses: actions/upload-artifact@v4
48 | with:
49 | name: coverage-test-failures
50 | path: ${{ runner.temp }}/package
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 | # Session Data files
5 | .RData
6 | # Example code in package build process
7 | *-Ex.R
8 | # Output files from R CMD build
9 | /*.tar.gz
10 | # Output files from R CMD check
11 | /*.Rcheck/
12 | # RStudio files
13 | .Rproj.user/
14 | # produced vignettes
15 | vignettes/*.html
16 | vignettes/*.pdf
17 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
18 | .httr-oauth
19 | # knitr and R markdown default cache directories
20 | /*_cache/
21 | /cache/
22 | # Temporary files created by R markdown
23 | *.utf8.md
24 | *.knit.md
25 | .Rproj.user
26 | *.Rproj
27 | inst/doc
28 | doc
29 | Meta
30 | .DS_Store
31 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: quanteda.sentiment
2 | Title: Sentiment Analysis using 'quanteda'
3 | Version: 0.31
4 | Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role =
5 | c("aut", "cre", "cph")) )
6 | Description: Adds functions and dictionaries for computing sentiment using the 'quanteda' package.
7 | Depends:
8 | R (>= 4.1.0),
9 | quanteda (>= 3.2.1),
10 | methods
11 | Imports:
12 | Matrix,
13 | stringi
14 | License: GPL-3
15 | Encoding: UTF-8
16 | LazyData: true
17 | RoxygenNote: 7.3.1
18 | Suggests:
19 | covr,
20 | ggplot2,
21 | knitr,
22 | rmarkdown,
23 | spelling,
24 | testthat
25 | Roxygen: list(markdown = TRUE)
26 | Language: en-GB
27 | VignetteBuilder: knitr
28 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method("polarity<-",dictionary2)
4 | S3method("valence<-",dictionary2)
5 | S3method(polarity,dictionary2)
6 | S3method(textstat_polarity,character)
7 | S3method(textstat_polarity,corpus)
8 | S3method(textstat_polarity,default)
9 | S3method(textstat_polarity,dfm)
10 | S3method(textstat_polarity,tokens)
11 | S3method(textstat_valence,character)
12 | S3method(textstat_valence,corpus)
13 | S3method(textstat_valence,default)
14 | S3method(textstat_valence,dfm)
15 | S3method(textstat_valence,tokens)
16 | S3method(valence,dictionary2)
17 | export("polarity<-")
18 | export("valence<-")
19 | export(polarity)
20 | export(sent_abspropdiff)
21 | export(sent_logit)
22 | export(sent_relpropdiff)
23 | export(textstat_polarity)
24 | export(textstat_valence)
25 | export(valence)
26 | import(methods)
27 | importFrom(Matrix,rowSums)
28 | importFrom(quanteda,as.dfm)
29 | importFrom(quanteda,convert)
30 | importFrom(quanteda,corpus)
31 | importFrom(quanteda,dfm)
32 | importFrom(quanteda,dfm_lookup)
33 | importFrom(quanteda,dictionary)
34 | importFrom(quanteda,tokens)
35 | importFrom(quanteda,tokens_lookup)
36 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # quanteda.sentiment 0.2x
2 |
3 | * Usability improvements
4 | * nested_scope = "dictionary" is now standard for applications on tokens, for textstat_polarity() (#12)
5 | * Minor changes in preparation for forthcoming v3 release.
6 |
7 | # quanteda.sentiment 0.1
8 |
9 | * Added a `NEWS.md` file to track changes to the package.
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/R/aaa.R:
--------------------------------------------------------------------------------
1 | build_dictionary2 <- quanteda:::build_dictionary2
2 | validate_dictionary <- quanteda:::validate_dictionary
3 | build_dictionary2 <- quanteda:::build_dictionary2
4 |
--------------------------------------------------------------------------------
/R/data-documentation.R:
--------------------------------------------------------------------------------
1 | #' Lexicoder Sentiment Dictionary (2015)
2 | #'
3 | #' The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} [dictionary]
4 | #' format.
5 | #'
6 | #' @details
7 | #' The dictionary consists of 2,858 "negative" sentiment words and 1,709
8 | #' "positive" sentiment words. A further set of 2,860 and 1,721 negations of
9 | #' negative and positive words, respectively, is also included. While many users
10 | #' will find the non-negation sentiment forms of the LSD adequate for sentiment
11 | #' analysis, Young and Soroka (2012) did find a small, but non-negligible
12 | #' increase in performance when accounting for negations. Users wishing to test
13 | #' this or include the negations are encouraged to subtract negated positive
14 | #' words from the count of positive words, and subtract the negated negative
15 | #' words from the negative count.
16 | #'
17 | #' Young and Soroka (2012) also suggest the use of a pre-processing script to
18 | #' remove specific cases of some words (i.e., "good bye", or "nobody better",
19 | #' which should not be counted as positive). Pre-processing scripts are
20 | #' available at .
21 | #' @section License and Conditions:
22 | #' The LSD is available for non-commercial academic purposes only. By using
23 | #' `data_dictionary_LSD2015`, you accept these terms.
24 | #'
25 | #' Please cite the references below when using the dictionary.
26 | #' @format
27 | #' A [dictionary] of four keys containing glob-style [pattern
28 | #' matches][valuetype].
29 | #' \describe{
30 | #' \item{`negative`}{2,858 word patterns indicating negative sentiment}
31 | #' \item{`positive`}{1,709 word patterns indicating positive sentiment}
32 | #' \item{`neg_positive`}{1,721 word patterns indicating a positive word preceded
33 | #' by a negation (used to convey negative sentiment)}
34 | #' \item{`neg_negative`}{2,860 word patterns indicating a negative word preceded
35 | #' by a negation (used to convey positive sentiment)}
36 | #' }
37 | #' @references
38 | #' The objectives, development and reliability of the dictionary are discussed
39 | #' in detail in Young and Soroka (2012). Please cite this article when using
40 | #' the Lexicoder Sentiment Dictionary and related resources.
41 | #
42 | #' Young, L. & Soroka, S. (2012). *Lexicoder Sentiment
43 | #' Dictionary*. Available at .
44 | #'
45 | #' Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of
46 | #' Sentiment in Political Texts. *Political Communication*, 29(2), 205--231.
47 | #' \doi{10.1080/10584609.2012.671234}
48 | #' @keywords data
49 | #' @examples
50 | #' # checking polarity
51 | #' polarity(data_dictionary_LSD2015)
52 | #'
53 | #' # simple example
54 | #' library("quanteda")
55 | #' txt <- "This aggressive policy will not win friends."
56 | #'
57 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
58 | #' exclusive = FALSE)
59 | #' ## tokens from 1 document.
60 | #' ## text1 :
61 | #' ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE" "POSITIVE" "."
62 | #'
63 | #' # notice that double-counting of negated and non-negated terms is avoided
64 | #' # when using nested_scope = "dictionary"
65 | #' tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
66 | #' exclusive = FALSE, nested_scope = "dictionary")
67 | #' ## tokens from 1 document.
68 | #' ## text1 :
69 | #' ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE."
70 | #'
71 | #' # on larger examples - notice that few negations are used
72 | #' tail(data_corpus_inaugural) |>
73 | #' tokens() |>
74 | #' tokens_lookup(dictionary = data_dictionary_LSD2015) |>
75 | #' dfm()
76 | "data_dictionary_LSD2015"
77 |
78 | #' NRC Word-Emotion Association Lexicon
79 | #'
80 | #' @description
81 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Mohammad and
82 | #' Charron's (2010, 2013) English version of the NRC Word-Emotion Association
83 | #' Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight
84 | #' emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust)
85 | #' and two sentiments (negative and positive) manually annotated on Amazon's
86 | #' Mechanical Turk.
87 | #'
88 | #' @description
89 | #' The Sentiment and Emotion Lexicons is a collection of lexicons that was
90 | #' entirely created by the experts of the National Research Council of Canada.
91 | #' Developed with a wide range of applications, this lexicon collection can be
92 | #' used in a multitude of contexts such as sentiment analysis, product
93 | #' marketing, consumer behaviour and even political campaign analysis.
94 | #'
95 | #' @description
96 | #' The technology uses a list of words that help identify emotions, sentiment,
97 | #' as well as analyzing hashtags, emoticons and word-colour associations. The
98 | #' lexicons contain entries for English words, and can be used to analyze
99 | #' English texts.
100 | #' @references
101 | #' Mohammad, S. & Turney, P. (2013). [Crowdsourcing a Word-Emotion Association
102 | #' Lexicon](https://arxiv.org/abs/1308.6297). *Computational Intelligence*,
103 | #' 29(3), 436--465.
104 | #'
105 | #' Mohammad, S. & Turney, P. (2010). [Emotions Evoked by Common Words and
106 | #' Phrases: Using Mechanical Turk to Create an Emotion
107 | #' Lexicon](https://dl.acm.org/doi/10.5555/1860631.1860635). In *Proceedings of
108 | #' the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and
109 | #' Generation of Emotion in Text*, June 2010, LA, California.
110 | #' @source
111 | #'
112 | #' See also
113 | #' @section License and Terms of Use:
114 | #' Free for research purposes.
115 | #'
116 | #' For questions about the commercial license, email Pierre Charron (Client
117 | #' Relationship Leader at NRC): `Pierre.Charron@nrc-cnrc.gc.ca`.
118 | #'
119 | #' Terms of Use:
120 | #' * Cite the papers associated with the lexicons in your research papers and
121 | #' articles that make use of them. (The papers associated with each lexicon
122 | #' are listed below, and also in the READMEs for individual lexicons.)
123 | #' * In news articles and online posts on work using these lexicons, cite the
124 | #' appropriate lexicons. For example: "This application/product/tool makes
125 | #' use of the `resource name`, created by `author(s)` at the National
126 | #' Research Council Canada." (The creators of each lexicon are listed below.
127 | #' Also, if you send us an email, we will be thrilled to know about how you
128 | #' have used the lexicon.) If possible hyperlink to this page:
129 | #' .
130 | #' * If you use a lexicon in a product or application, then acknowledge this in
131 | #' the 'About' page and other relevant documentation of the application by
132 | #' stating the name of the resource, the authors, and NRC. For example: "This
133 | #' application/product/tool makes use of the `resource name`, created by
134 | #' `author(s)` at the National Research Council Canada." (The creators of
135 | #' each lexicon are listed below. Also, if you send us an email, we will be
136 | #' thrilled to know about how you have used the lexicon.) If possible
137 | #' hyperlink to this page: .
138 | #' * Do not redistribute the data. Direct interested parties to this page:
139 | #' .
140 | #' * National Research Council Canada (NRC) disclaims any responsibility for
141 | #' the use of the lexicons listed here and does not provide technical
142 | #' support. However, the contact listed above will be happy to respond to
143 | #' queries and clarifications.
144 | #' @note Technical and research-related questions can be addressed to Saif M.
145 | #' Mohammad (Senior Research Scientist at NRC):
146 | #' `Saif.Mohammad@nrc-cnrc.gc.ca`.
147 | #' @keywords data
148 | "data_dictionary_NRC"
149 |
150 | #' Positive and negative words from Hu and Liu (2004)
151 | #'
152 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing 2,006
153 | #' positive and 4,783 negative words from Hu and Liu (2004, 2005).
154 | #' @format
155 | #' A [dictionary] of fixed word patterns with two keys:
156 | #' * `positive`: 2,006 words with positive polarity
157 | #' * `negative`: 4,783 words with negative polarity
158 | #' @references
159 | #' Hu, M. & Liu, B. (2004). [Mining and Summarizing Customer
160 | #' Reviews](https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf). In
161 | #' Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
162 | #' and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA.
163 | #'
164 | #' Liu, M., Hu, M., & Cheng, J. (2005). [Opinion Observer: Analyzing and
165 | #' Comparing Opinions on the
166 | #' Web](https://www.cs.uic.edu/~liub/publications/www05-p536.pdf). In
167 | #' Proceedings of the 14th International World Wide Web conference (WWW-2005),
168 | #' May 10--14, 2005, Chiba, Japan.
169 | #' @section License:
170 | #' Unknown.
171 | #' @source
172 | #' @keywords data
173 | "data_dictionary_HuLiu"
174 |
175 | #' Augmented General Inquirer *Positiv* and *Negativ* dictionary
176 | #'
177 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the
178 | #' *Positiv* and *Negativ* dictionary entries from the augmented
179 | #' General Inquirer. These are new valence categories described at
180 | #' `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also including the
181 | #' terms from the "yes" "no" dictionary entries.
182 | #' @format
183 | #' A [dictionary] of fixed word patterns with two keys:
184 | #' * `positive`: 1,653 words with positive polarity
185 | #' * `negative`: 2,010 words with negative polarity
186 | #' @references Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966).
187 | #' *The General Inquirer: A Computer Approach to Content Analysis.*
188 | #' Cambridge, MA: MIT Press.
189 | #' @source `http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm` --
190 | #' although this site ceased operating some time in 2021
191 | #' @keywords data
192 | "data_dictionary_geninqposneg"
193 |
194 | #' SentimentWortschatz (SentiWS)
195 | #'
196 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing
197 | #' SentimentWortschatz (SentiWS), a publicly available German-language resource
198 | #' for sentiment analysis. The current version of SentiWS contains 1,650
199 | #' positive and 1,818 negative words, which sum up to 15,649 positive and 15,632
200 | #' negative word forms including their inflections. It not only contains
201 | #' adjectives and adverbs explicitly expressing a sentiment, but also nouns and
202 | #' verbs implicitly containing one. The original dictionary weights within the
203 | #' interval of -1 to 1. Note that the version implemented in
204 | #' \pkg{quanteda.dictionaries} uses a binary classification into positive
205 | #' (weight > 0) and negative (weight < 0) features.
206 | #' @source
207 | #' @references
208 | #' Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly
209 | #' Available German-language Resource for Sentiment
210 | #' Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf).
211 | #' In _Proceedings of the 7th International Language Ressources and Evaluation
212 | #' (LREC'10)_, 1168--1171.
213 | #'
214 | #' @keywords data
215 | "data_dictionary_sentiws"
216 |
217 | #' Nielsen's (2011) 'new ANEW' valenced word list
218 | #'
219 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing Finn Årup
220 | #' Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of
221 | #' English words rated for valence with values between -5 (negative) and +5
222 | #' (positive). AFINN-111, the latest version, contains 2,477 words and phrases.
223 | #' @source
224 | #' @references
225 | #' Nielsen, F. Å. (2011). [A new ANEW: Evaluation of a Word List for Sentiment
226 | #' Analysis in Microblogs.](https://arxiv.org/abs/1103.2903) In *Proceedings
227 | #' of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come
228 | #' in Small Packages*, 93--98.
229 | #' @format
230 | #' A [dictionary] with one key, `AFINN`, with valences from -5 (negative) to +5
231 | #' (positive).
232 | #' @section License:
233 | #' [Open Database License (ODbL) v1.0](https://opendatacommons.org/licenses/odbl/1-0/)
234 | #' @keywords data
235 | "data_dictionary_AFINN"
236 |
237 | #' Affective Norms for English Words (ANEW)
238 | #'
239 | #' A quanteda dictionary object containing the ANEW, or Affective Norms for
240 | #' English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a
241 | #' lexicon of 2,471 distinct fixed word matches that are associated with three
242 | #' valenced categories: pleasure, arousal, and dominance.
243 | #' @format
244 | #' A [dictionary] with three valenced keys: `pleasure`, `arousal`, and
245 | #' `dominance`, each with valences from 1 to 9 and containing the same 2,471
246 | #' fixed word values.
247 | #' @section License:
248 | #' ANEW Statement of Use
249 | #'
250 | #' In accepting the ANEW materials, I agree not to make the ANEW available to
251 | #' the media (television, magazines, etc.) or to place them on any internet or
252 | #' computer-accessible websites. I also agree not to publish the ANEW in any
253 | #' print format – including JOURNALS, newspapers, etc. I also agree that I will
254 | #' not provide the ANEW materials to profit making companies or organizations
255 | #' and I agree not to distribute my username and password to unauthorized
256 | #' parties.
257 | #' @keywords data
258 | "data_dictionary_ANEW"
259 |
260 | #' Rauh's German Political Sentiment Dictionary
261 | #'
262 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing the
263 | #' dictionaries provided in Rauh (forthcoming). Rauh assesses its performance
264 | #' against human intuition of sentiment in German political language
265 | #' (parliamentary speeches, party manifestos, and media coverage). The resource
266 | #' builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and
267 | #' GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the
268 | #' negation correction provided by the dictionary, currently a combination of
269 | #' [tokens_replace][quanteda::tokens_replace] and [tokens_compound][quanteda::tokens_compound] is
270 | #' required to harmonize the five covered bi-gram patterns prior to scoring. The
271 | #' example below shows how to conduct this transformation. Note that the process
272 | #' changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering
273 | #' some of the features of the original corpus.
274 | #' @format The dictionary has four keys.
275 | #' \describe{
276 | #' \item{`negative`}{19,750 terms indicating negative sentiment}
277 | #' \item{`positive`}{17,330 terms indicating positive sentiment}
278 | #' \item{`neg_positive`}{17,330 terms indicating a positive word preceded
279 | #' by a negation (used to convey negative sentiment)}
280 | #' \item{`neg_negative`}{19,750 terms indicating a negative word preceded
281 | #' by a negation (used to convey positive sentiment)}
282 | #' }
283 | #' @source \doi{10.7910/DVN/BKBXWD}
284 | #' @references
285 | #' Rauh, C. (2018). Validating a Sentiment Dictionary for German Political
286 | #' Language: A Workbench Note.
287 | #' *Journal of Information Technology & Politics*, 15(4), 319--343.
288 | #' \doi{10.1080/19331681.2018.1485608}
289 | #'
290 | #' Remus, R., Quasthoff U., & Heyer, G. (2010). "[SentiWS - a Publicly
291 | #' Available German-language Resource for Sentiment
292 | #' Analysis.](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf)"
293 | #' In *Proceedings of the 7th International Language Resources and Evaluation
294 | #' (LREC'10)*, 1168--1171.
295 | #'
296 | #' Waltinger, U. (2010). "[GermanPolarityClues: A Lexical Resource for German
297 | #' Sentiment Analysis](http://www.ulliwaltinger.de/pdf/91_Paper.pdf)." In
298 | #' *International Conference on Language Resources and Evaluation*, 17--23 May
299 | #' 2010 LREC'10.
300 | #' @examples
301 | #' \donttest{
302 | #' # tokenize example text
303 | #' toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch")
304 | #' # replace negation markers with "not"
305 | #' toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein",
306 | #' "keine", "keinen"),
307 | #' replacement = rep("not", 5))
308 | #' # compound bi-gram negation patterns
309 | #' toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ")
310 | #'
311 | #' # apply dictionary
312 | #' tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |>
313 | #' dfm()
314 | #' }
315 | #' @keywords data
316 | "data_dictionary_Rauh"
317 |
318 | #' Loughran and McDonald Sentiment Word Lists
319 | #'
320 | #' A \pkg{quanteda} [dictionary][quanteda::dictionary] object containing
321 | #' the 2014 version of the Loughran and McDonald Sentiment Word Lists. The
322 | #' categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903),
323 | #' "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68)
324 | #' and "modal words weak" (0).
325 | #' @source
326 | #' @references
327 | #' Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability?
328 | #' Textual Analysis, Dictionaries, and 10-Ks.
329 | #' *Journal of Finance*, 66(1), 35--65. \doi{10.1111/j.1540-6261.2010.01625.x}
330 | #' @keywords data
331 | "data_dictionary_LoughranMcDonald"
332 |
333 |
--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
1 | print_dictionary <- quanteda:::print_dictionary
2 |
3 | # new method for printing sentiment dictionaries
4 | #' @import methods
5 | setMethod("print", signature(x = "dictionary2"),
6 | function(x,
7 | max_nkey = quanteda::quanteda_options("print_dictionary_max_nkey"),
8 | max_nval = quanteda::quanteda_options("print_dictionary_max_nval"),
9 | show_summary = quanteda::quanteda_options("print_dictionary_summary"),
10 | ...) {
11 | x <- quanteda::as.dictionary(x)
12 | if (show_summary) {
13 | depth <- dictionary_depth(x)
14 | lev <- if (depth > 1L) " primary" else ""
15 | nkey <- length(names(x))
16 | cat("Dictionary object with ", as.character(nkey), lev, " key entr",
17 | if (nkey == 1L) "y" else "ies", sep = "")
18 | if (lev != "") cat(" and ", as.character(depth), " nested levels", sep = "")
19 | cat(".\n")
20 | if (!is.null(polarity(x))) {
21 | cat("Polarities: ")
22 | poles <- lapply(polarity(x), function(y) paste0("\"", y, "\""))
23 | cat(mapply(paste, names(poles), "=",
24 | unname(sapply(poles, paste, collapse = ", "))) |>
25 | paste(collapse = "; "),
26 | "\n")
27 | }
28 | if (!is.null(valence(x))) {
29 | cat("Valences set for keys: ")
30 | cat(paste(names(valence(x)), collapse = ", "), "\n")
31 | }
32 | }
33 | invisible(print_dictionary(x, 1, max_nkey, max_nval, ...))
34 | })
35 |
--------------------------------------------------------------------------------
/R/quanteda.sentiment-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | # The following block is used by usethis to automatically manage
5 | # roxygen namespace tags. Modify with care!
6 | ## usethis namespace: start
7 | ## usethis namespace: end
8 | NULL
9 |
--------------------------------------------------------------------------------
/R/re-exports.R:
--------------------------------------------------------------------------------
1 | friendly_class_undefined_message <- quanteda:::friendly_class_undefined_message
2 | dictionary_depth <- quanteda:::dictionary_depth
3 | print_dictionary <- quanteda:::print_dictionary
4 |
--------------------------------------------------------------------------------
/R/textstat_polarity.R:
--------------------------------------------------------------------------------
1 | # textstat_polarity ----------------
2 |
3 | #' Compute sentiment from key polarities
4 | #'
5 | #' Compute sentiment scores using a polarity approach, based on assigned
6 | #' categories (types or features) of positive, negative, and neutral sentiment.
7 | #' Several formulas for combining the polar categories are available, or the
8 | #' user can supply a custom function.
9 | #' @param x a character, [corpus], [tokens], or [dfm] object containing
10 | #' text, tokens, or features whose sentiment will be scored
11 | #' @param dictionary a [dictionary] that has [polarity] set, indicating which
12 | #' keys are associated with positive, negative, and (optionally) neutral
13 | #' sentiment
14 | #' @param fun function; the formula for computing sentiment, which must refer to
15 | #' `pos`, `neg`, and (optionally) `neut`. The default is the "logit" scale
16 | #' (Lowe et al 2011) which is the log of (positive / negative) counts. See
17 | #' [sentiment-functions] for details and for additional available functions,
18 | #' as well as details on how to supply custom functions.
19 | #' @param ... additional arguments passed to `fun`
20 | #' @return a [data.frame] of sentiment scores
21 | #' @export
22 | #' @references Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
23 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
24 | #' Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}
25 | #' @examples
26 | #' library("quanteda")
27 | #' corp <- tail(data_corpus_inaugural, n = 5)
28 | #' toks <- tokens(corp)
29 | #' dfmat <- dfm(toks)
30 | #' polar1 <- list(pos = "positive", neg = "negative")
31 | #' polar2 <- list(pos = c("positive", "neg_negative"),
32 | #' neg = c("negative", "neg_positive"))
33 | #'
34 | #' polarity(data_dictionary_LSD2015) <- polar1
35 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
36 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
37 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
38 | #'
39 | #' polarity(data_dictionary_LSD2015) <- polar2
40 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
41 | #' textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
42 | #' textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
43 | #' textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
44 | #'
45 | #' # with a user-supplied function
46 | #' sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
47 | #' textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn)
48 | textstat_polarity <- function(x, dictionary, fun = sent_logit, ...) {
49 | UseMethod("textstat_polarity")
50 | }
51 |
52 | #' @export
53 | textstat_polarity.default <- function(x, dictionary, fun = sent_logit, ...) {
54 | stop(friendly_class_undefined_message(class(x), "textstat_polarity"))
55 | }
56 |
57 | #' @importFrom quanteda corpus
58 | #' @export
59 | textstat_polarity.character <- function(x, ...) {
60 | textstat_polarity(corpus(x), ...)
61 | }
62 |
63 | #' @importFrom quanteda tokens
64 | #' @export
65 | textstat_polarity.corpus <- function(x, ...) {
66 | textstat_polarity(tokens(x), ...)
67 | }
68 |
69 | #' @importFrom quanteda dictionary tokens_lookup dfm
70 | #' @export
71 | textstat_polarity.tokens <- function(x, dictionary, ...) {
72 | dict <- get_polarity_dictionary(dictionary)
73 | poldict <- dictionary(polarity(dict))
74 | polarity(poldict) <- polarity(dict)
75 |
76 | tokens(x) |>
77 | tokens_lookup(dictionary = dict, nomatch = "other", nested_scope = "dictionary") |>
78 | dfm() |>
79 | textstat_polarity(dictionary = poldict, ...)
80 | }
81 |
82 | #' @importFrom quanteda convert dfm_lookup as.dfm
83 | #' @export
84 | textstat_polarity.dfm <- function(x, dictionary, fun = sent_logit, ...) {
85 | dict <- get_polarity_dictionary(dictionary)
86 |
87 | result <- fun(dfm_lookup(x, dict, nomatch = "other"), ...)
88 | result <- convert(as.dfm(result), to = "data.frame")
89 | names(result)[2] <- "sentiment"
90 |
91 | class(result) <- c("sentiment", "textstat", "data.frame")
92 | attr(result, "fun") <- fun
93 | attr(result, "fun_name") <- as.character(substitute(fun))
94 |
95 | result
96 | }
97 |
98 |
99 | # polarity setting and checking functions --------------
100 |
101 | #' Set or get the sentiment polarity of a dictionary
102 | #'
103 | #' Set or retrieve the polarity of a [dictionary] object for the purposes of
104 | #' sentiment analysis. Polarity consists of a set of dictionary keys that are
105 | #' associated with positive, negative, and (optionally) neutral categories for
106 | #' use in [textstat_polarity()].
107 | #'
108 | #' A dictionary may have only one set of polarities at a time, but may be
109 | #' changed as needed.
110 | #' @param x a [dictionary] object
111 | #' @return `polarity()` returns the polarity as a list.
112 | #' @keywords dictionary textstat utility
113 | #' @export
114 | #'
115 | #' @examples
116 | #' library("quanteda")
117 | #' simpledict <- dictionary(list(
118 | #' happy = c("happy", "jubilant", "exuberant"),
119 | #' sad = c("sad", "morose", "down")
120 | #' ))
121 | #' polarity(simpledict)
122 | #' polarity(simpledict) <- list(pos = "happy", neg = "sad")
123 | #' polarity(simpledict)
124 | #'
125 | #' # can list multiple keys
126 | #' polarity(data_dictionary_LSD2015) <- list(
127 | #' pos = c("positive", "neg_negative"),
128 | #' neg = c("negative", "neg_positive")
129 | #' )
130 | #' polarity(data_dictionary_LSD2015)
131 | polarity <- function(x) {
132 | UseMethod("polarity")
133 | }
134 |
135 | #' @export
136 | polarity.dictionary2 <- function(x) {
137 | x@meta$object$polarity
138 | }
139 |
140 | #' @rdname polarity
141 | #' @param value list consisting of named character vectors `pos`, `neg`, and
142 | #' (optionally) `neut` corresponding to positive, negative, and neutral
143 | #' sentiment categories respectively. Each element may contain multiple
144 | #' key names. The `neut` category is optional but `pos` and `neg` must be
145 | #' supplied.
146 | #' @return `polarity<-` sets the dictionary's polarity.
147 | #' @export
148 | "polarity<-" <- function(x, value) {
149 | UseMethod("polarity<-")
150 | }
151 |
152 | #' @export
153 | "polarity<-.dictionary2" <- function(x, value) {
154 | if (!is.null(value)) {
155 | if (!setequal(union(c("pos", "neg", "neut"), names(value)),
156 | c("pos", "neg", "neut")) ||
157 | !is.list(value)) {
158 | stop("value must be a list of 'pos', 'neg', and (optionally) 'neut'",
159 | call. = FALSE)
160 | }
161 | check_that_poles_exist(x, value)
162 | } else {
163 | if (is.null(valence(x))) class(x) <- "dictionary2"
164 | }
165 |
166 | x@meta$object$polarity <- value
167 | x
168 | }
169 |
170 | #' Get a standard polarity dictionary for sentiment analysis
171 | #'
172 | #' Checks and standardizes a [dictionary] object with its [polarity] set, so
173 | #' that the polarity categories are standardized into the keys `pos`, `neg`, and
174 | #' (optionally) `neut`. Also checks that the dictionary contains all of the
175 | #' keys named in the polarity object. (It is necessary to check here since the
176 | #' dictionary could have been subset after creation.)
177 | #' @param dictionary a \pkg{quanteda} [dictionary]
178 | #' @return a single-level [dictionary] with keys `pos`, `neg`, and (optionally)
179 | #' `neut`.
180 | #' @keywords internal
181 | get_polarity_dictionary <- function(dictionary) {
182 | poles <- polarity(dictionary)
183 |
184 | # check the poles
185 | if (is.null(poles)) {
186 | stop("polarity is not set for this dictionary; see ?polarity",
187 | call. = FALSE)
188 | }
189 | check_that_poles_exist(dictionary, poles)
190 |
191 | # standardize the dictionary
192 | dictlist <- list(
193 | pos = unlist(dictionary[poles$pos], use.names = FALSE),
194 | neg = unlist(dictionary[poles$neg], use.names = FALSE),
195 | neut = unlist(dictionary[poles$neut], use.names = FALSE)
196 | )
197 | dict <- dictionary(dictlist[!sapply(dictlist, is.null)])
198 |
199 | # set the polarity to the keys
200 | newpoles <- list(pos = "pos", neg = "neg")
201 | if (!is.null(dictlist$neut)) newpoles <- c(newpoles, list(neut = "neut"))
202 | polarity(dict) <- newpoles
203 |
204 | return(dict)
205 | }
206 |
207 |
208 | check_that_poles_exist <- function(dictionary, poles) {
209 | poles <- unlist(poles, use.names = FALSE)
210 | polematch <- poles %in% names(dictionary)
211 | if (!all(polematch)) {
212 | stop("'", poles[!polematch], "' key not found in this dictionary",
213 | call. = FALSE)
214 | }
215 | }
216 |
217 | # sentiment formula functions --------------
218 |
219 | #' Sentiment functions
220 | #'
221 | #' Functions for computing sentiment, for [textstat_polarity()]. Each function
222 | #' takes an input [dfm] with fixed feature names (see Details), and returns a
223 | #' sparse Matrix with a single column representing the results of the sentiment
224 | #' calculation.
225 | #'
226 | #' @details
227 | #' User supplied functions must take `x` and optional additional arguments, such
228 | #' as `smooth` for a smoothing constant for the logit scaling function. feature
229 | #' names for the sentiment categories `pos`, `neg`, `neut`, and `other`. (The
230 | #' `other` category is only required when a scaling function needs the count of
231 | #' non-sentiment associated features.)
232 | #'
233 | #' Additional arguments may be passed via `...`, such as `smooth` for the logit
234 | #' scale.
235 | #'
236 | #' @param x a [dfm] that has the following required feature names: `pos`,
237 | #' `neg`, `neut`, and `other`
238 | #' @return a sparse \pkg{Matrix} object of documents by sentiment score, where
239 | #' the sentiment score is the only column. (Its name is unimportant as this
240 | #' will not be used by [textstat_polarity()].)
241 | #' @keywords textstat internal
242 | #' @references Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
243 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
244 | #' Quarterly_, 36(1), 123–155.
245 | #' \doi{10.1111/j.1939-9162.2010.00006.x}
246 | #' @name sentiment-functions
247 | #' @examples
248 | #' library("quanteda")
249 | #' dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |>
250 | #' tokens() |>
251 | #' dfm()
252 | #' sent_logit(dfmat)
253 | #' sent_abspropdiff(dfmat)
254 | #'
255 | #' # user-supplied function
256 | #' my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
257 | #' my_sent_fn(dfmat)
258 | #'
259 | #' # user supplied function with fixed weights and using neutral category
260 | #' dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |>
261 | #' tokens() |>
262 | #' dfm()
263 | #' my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3
264 | #' my_sent_fn2(dfmat2)
265 | NULL
266 |
267 | #' @description `sent_logit` is \eqn{log(\frac{pos}{neg})}.
268 | #' @rdname sentiment-functions
269 | #' @param smooth additional smoothing function added to `pos` and `neg` before
270 | #' logarithmic transformation
271 | #' @export
272 | sent_logit <- function(x, smooth = 0.5) {
273 | log(x[, "pos"] + smooth) - log(x[, "neg"] + smooth)
274 | }
275 |
276 | #' @description `sent_abspropdiff` is \eqn{\frac{pos - neg}{N}}, where \eqn{N}
277 | #' is the total number of all features in a document.
278 | #' @rdname sentiment-functions
279 | #' @importFrom Matrix rowSums
280 | #' @export
281 | sent_abspropdiff <- function(x) {
282 | (x[, "pos"] - x[, "neg"]) / Matrix::rowSums(x)
283 | }
284 |
285 | #' @description `sent_relpropdiff` is \eqn{\frac{pos - neg}{pos + neg}}.
286 | #' @rdname sentiment-functions
287 | #' @export
288 | sent_relpropdiff <- function(x) {
289 | (x[, "pos"] - x[, "neg"]) / (x[, "pos"] + x[, "neg"])
290 | }
291 |
--------------------------------------------------------------------------------
/R/textstat_valence.R:
--------------------------------------------------------------------------------
1 | # textstat_valence ----------------
2 |
3 | #' Compute sentiment from word valences
4 | #'
5 | #' Compute sentiment scores from tokens or document-feature matrices, based on
6 | #' the valences of dictionary keys and values.
7 | #' @param x a character, [corpus], [tokens], or [dfm] object containing
8 | #' text, tokens, or features whose sentiment will be scored.
9 | #' @param dictionary a \pkg{quanteda} [dictionary] that has [valence] set, in
10 | #' the form of numerical valences associated with sentiment
11 | #' @param normalization the baseline for normalizing the sentiment counts after
12 | #' scoring. Sentiment scores within keys are weighted means of the tokens
13 | #' matched to dictionary values, weighted by their valences. The default
14 | #' `"dictionary"` is to average over only the valenced words. `"all"`
15 | #' averages across all tokens, and `"none"` does no normalization.
16 | #' @param ... not used here
17 | #' @return a data.frame of sentiment scores
18 | #' @note
19 | #' If the input item is a [dfm], then multi-word values will not be matched
20 | #' unless the features of the [dfm] have been compounded previously. The input
21 | #' objects should not have had dictionaries applied previously.
22 | #' @export
23 | #' @references
24 | #' For a discussion of how to aggregate sentiment scores to the document
25 | #' level, see:
26 | #'
27 | #' Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
28 | #' Scaling Policy Preferences from Coded Political Texts. _Legislative Studies
29 | #' Quarterly_, 36(1), 123–155.
30 | #' \doi{10.1111/j.1939-9162.2010.00006.x}
31 | #' @seealso [valence()]
32 | #' @examples
33 | #' library("quanteda")
34 | #' \dontrun{
35 | #'
36 | #' # AFINN
37 | #' afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt",
38 | #' package = "quanteda.sentiment"),
39 | #' header = FALSE, col.names = c("word", "valence"))
40 | #' data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
41 | #' valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
42 | #' textstat_valence(toks, dictionary = data_dictionary_afinn)
43 | #'
44 | #' # ANEW
45 | #' anew <- read.delim(url("https://bit.ly/2zZ44w0"))
46 | #' anew <- anew[!duplicated(anew$Word), ] # because some words repeat
47 | #' data_dictionary_anew <- dictionary(list(pleasure = anew$Word,
48 | #' arousal = anew$Word,
49 | #' dominance = anew$Word))
50 | #' valence(data_dictionary_anew) <- list(pleasure = anew$ValMn,
51 | #' arousal = anew$AroMn,
52 | #' dominance = anew$DomMn)
53 | #' textstat_valence(toks, data_dictionary_anew["pleasure"])
54 | #' textstat_valence(toks, data_dictionary_anew["arousal"])}
55 | #'
56 | textstat_valence <- function(x, dictionary,
57 | normalization = c("dictionary", "all", "none"), ...) {
58 | UseMethod("textstat_valence")
59 | }
60 |
61 | #' @export
62 | textstat_valence.default <- function(x, dictionary,
63 | normalization = c("dictionary", "all", "none"), ...) {
64 | stop(friendly_class_undefined_message(class(x), "textstat_valence"))
65 | }
66 |
67 | #' @export
68 | textstat_valence.character <- function(x, ...) {
69 | textstat_valence(corpus(x), ...)
70 | }
71 |
72 | #' @export
73 | textstat_valence.corpus <- function(x, ...) {
74 | textstat_valence(tokens(x), ...)
75 | }
76 |
77 | #' @export
78 | textstat_valence.tokens <- function(x, dictionary,
79 | normalization = c("dictionary", "all", "none"), ...) {
80 | normalization <- match.arg(normalization)
81 | valence(dictionary) <- set_valences(dictionary, valence(dictionary))
82 | numdict <- dictionary(as.list(flip_valence(dictionary)))
83 | quanteda::as.tokens(x) |>
84 | tokens_lookup(dictionary = numdict, nomatch = "other",
85 | nested_scope = "dictionary") |>
86 | dfm() |>
87 | aggregate_valence(norm = normalization)
88 | }
89 |
90 | #' @export
91 | textstat_valence.dfm <- function(x, dictionary,
92 | normalization = c("dictionary", "all", "none"), ...) {
93 | normalization <- match.arg(normalization)
94 | valence(dictionary) <- set_valences(dictionary, valence(dictionary))
95 | numdict <- dictionary(as.list(flip_valence(dictionary)))
96 | as.dfm(x) |>
97 | dfm_lookup(dictionary = numdict, nomatch = "other") |>
98 | aggregate_valence(norm = normalization)
99 | }
100 |
101 | # internal sentiment calculation functions -----------
102 |
103 | # uses Kohei's approach to make the valence values into the keys, and
104 | # then groups all values together under that score
105 | flip_valence <- function(dictionary) {
106 | v <- valence(dictionary)
107 | if (is.null(v)) stop("valence not set")
108 |
109 | structure(unlist(sapply(v, names), use.names = FALSE),
110 | names = unlist(v, use.names = FALSE))
111 | }
112 |
113 | aggregate_valence <- function(x, norm = c("dictionary", "all", "none")) {
114 | norm <- match.arg(norm)
115 | other_index <- match("other", colnames(x))
116 | if (norm == "dictionary") {
117 | denom <- rowSums(x[, -other_index])
118 | } else if (norm == "all") {
119 | denom <- rowSums(x)
120 | } else if (norm == "none") {
121 | denom <- 1
122 | }
123 | x <- x[, -other_index]
124 | result <- data.frame(doc_id = quanteda::docnames(x),
125 | sentiment = as.vector(x %*% as.numeric(colnames(x))
126 | / denom))
127 | result$sentiment[is.na(result$sentiment)] <- 0
128 | result
129 | }
130 |
131 | # valence setting and checking functions --------------
132 |
133 | #' Set or get the valences of dictionary values or keys
134 | #'
135 | #' Set or retrieve the valences of a [dictionary] object for the purposes of
136 | #' sentiment analysis. Valences consist of numerical values attached to each
137 | #' dictionary "value". For dictionaries with a more "polarity"-based approach,
138 | #' see [textstat_polarity()]
139 | #'
140 | #' Valences are used only in [textstat_valence()].
141 | #'
142 | #' A dictionary may have only one set of valences at a time, but may be
143 | #' changed as needed.
144 | #' @param x a \pkg{quanteda} [dictionary][quanteda::dictionary] object
145 | #' @return `valences()` returns the valences as a list named numeric vectors,
146 | #' where each list element corresponds to a key in the dictionary, and each
147 | #' numeric element matches a value within that key.
148 | #' @keywords dictionary textstat utility
149 | #' @seealso [textstat_valence()], [valence()]
150 | #' @export
151 | #'
152 | #' @examples
153 | #' library("quanteda")
154 | #'
155 | #' # setting valences
156 | #' dict <- dictionary(list(
157 | #' happiness = c("happy", "jubilant", "exuberant", "content"),
158 | #' anger = c("mad", "peeved", "irate", "furious", "livid")
159 | #' ))
160 | #' valence(dict)
161 | #' # using a 5-point scale: 1:1 match
162 | #' valence(dict) <- list(happiness = c(3, 4, 5, 2),
163 | #' anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
164 | #' valence(dict)
165 | #' # with single valences applied to all values within the keys
166 | #' valence(dict) <- c(happiness = 1, anger = -1)
167 | #' valence(dict)
168 | #' # with named elements - order does not matter
169 | #' valence(dict) <- list(
170 | #' happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2)
171 | #' )
172 | #' valence(dict)
173 | #'
174 | valence <- function(x) {
175 | UseMethod("valence")
176 | }
177 |
178 | #' @export
179 | valence.dictionary2 <- function(x) {
180 | x@meta$object$valence
181 | }
182 |
183 | #' @rdname valence
184 | #' @param value named list consisting of numerical value. The names of the
185 | #' elements must correspond to a dictionary key. Each element must be:
186 | #' * a single numeric value that will be applied to all of the dictionary
187 | #' values in that key; or
188 | #' * a vector of numeric values that matches the length and order of the
189 | #' dictionary values in that key; or
190 | #' * a named numeric vector where each element name matches dictionary values
191 | #' in the key.
192 | #' @return `valence<-` sets the dictionary's valences.
193 | #' @export
194 | "valence<-" <- function(x, value) {
195 | UseMethod("valence<-")
196 | }
197 |
198 | #' @export
199 | "valence<-.dictionary2" <- function(x, value) {
200 | if (!is.null(value)) {
201 | value <- as.list(value)
202 | check_valences(x, value)
203 | x@meta$object$valence <- set_valences(x, value)
204 | } else {
205 | x@meta$object$valence <- NULL
206 | if (!is.null(polarity(x))) class(x) <- "dictionary2"
207 | }
208 | x
209 | }
210 |
211 | dictionary_depth <- quanteda:::dictionary_depth
212 |
213 | check_valences <- function(dictionary, valences) {
214 | if (dictionary_depth(dictionary) > 1)
215 | stop("valenced dictionaries cannot be nested", call. = FALSE)
216 | if (!is.list(valences) || any(names(valences) == ""))
217 | stop("valence must be a fully named list", call. = FALSE)
218 | for (key in names(valences)) {
219 | if (!key %in% names(dictionary))
220 | stop("'", key, "' is not a dictionary key", call. = FALSE)
221 | if (!is.numeric(valences[[key]]))
222 | stop("valence values must be numeric", call. = FALSE)
223 | if (length(valences[[key]]) != 1 &&
224 | length(valences[[key]]) != length(dictionary[[key]]))
225 | stop("valence value length not equal to number of values for key '",
226 | key, "'", call. = FALSE)
227 | }
228 | }
229 |
230 | set_valences <- function(dictionary, valences) {
231 | # only use valences for keys in dictionary
232 | valences <- valences[names(valences) %in% names(dictionary)]
233 | if (!length(valences))
234 | stop("no valenced keys found")
235 |
236 | for (key in names(valences)) {
237 | # repeat valences if only a single value is supplied
238 | if (length(valences[[key]]) == 1)
239 | valences[[key]] <- rep(valences[[key]], length(dictionary[[key]]))
240 | # use dictionary values as names if none supplied
241 | if (length(names(valences[[key]])) != length(valences[[key]]))
242 | names(valences[[key]]) <- dictionary[[key]]
243 | }
244 | valences
245 | }
246 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 | ```{r, echo = FALSE}
6 | knitr::opts_chunk$set(
7 | collapse = TRUE,
8 | comment = "##",
9 | fig.path = "man/images/"
10 | )
11 | ```
12 | ```{r echo = FALSE, results = "hide", message = FALSE}
13 | library("badger")
14 | ```
15 |
16 | # quanteda.sentiment
17 |
18 |
19 | [](https://cran.r-project.org/package=quanteda.sentiment)
20 | `r badge_devel("quanteda/quanteda.sentiment", "royalblue")`
21 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
22 | [](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master)
23 | [](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml)
24 |
25 |
26 | ## Installation
27 |
28 | You can install **quanteda.sentiment** from GitHub with:
29 |
30 | ```{r eval = FALSE}
31 | remotes::install_github("quanteda/quanteda.sentiment")
32 | ```
33 |
34 | The package is not yet on CRAN.
35 |
36 | ## About
37 |
38 | **quanteda.sentiment** extends the **quanteda** package with functions for computing sentiment on text. It has two main functions, for computing two types of sentiment. These follow the structure of a **quanteda** dictionary, which consists of _key_ entries expressing the canonical concept, and _value_ patterns (such as "good", "sad*", etc.) to be matched in a text and counted as occurrences of that key.
39 |
40 | The approach to sentiment in this package approaches sentiment computation in two ways, depending on whether sentiment is considered a key attribute, in which case the keys are assigned a _polarity_ such as _positive_ or _negative_, or whether individual values are assigned a _valence_, in the form of some continuous value indicating a degree of sentiment. Each is implemented in a separate function:
41 |
42 | * **Polarity-based sentiment.** This is implemented via `textstat_polarity()`, for computing a sentiment based on keys set as "poles" of positive versus negative sentiment. Setting polarity is dones through the `polarity()<-` function and can be set for any dictionary, for any keys. "Sentiment" here can be broadly construed as any contrasting pair of poles, such as "Democrat" versus "Republican", for instance. More than one key can be associated with the same pole.
43 |
44 | Polar values are converted into sentiment scores using a flexible function, such as $\mathrm{log}(pos / neg)$, or $(pos - neg)/(pos + neg)$. **quanteda.sentiment** offers three built-in functions, but the user can supply any function for combining polarities.
45 |
46 | * **Valence-based sentiment.** This is implemented via `textstat_valence()`, for computing sentiment as the average valence of a document's words, based on a dictionary whose values have numeric valence scores. Valence scores are set using the `valence()<-` function. Each key in a dictionary may have values with difference valences.
47 |
48 | The package comes with the following built-in dictionaries:
49 |
50 | | Name | Description | Polarity | Valence |
51 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:|
52 | | data_dictionary_AFINN | Nielsen's (2011) 'new ANEW' valenced word list | | ✔ |
53 | | data_dictionary_ANEW | Affective Norms for English Words (ANEW) | | ✔ |
54 | | data_dictionary_geninqposneg | Augmented General Inquirer _Positiv_ and _Negativ_ dictionary | ✔ | |
55 | | data_dictionary_HuLiu | Positive and negative words from Hu and Liu (2004) | ✔ | |
56 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists | ✔ | |
57 | | data_dictionary_LSD2015 | Lexicoder Sentiment Dictionary (2015) | ✔ | |
58 | | data_dictionary_NRC | NRC Word-Emotion Association Lexicon | ✔ | |
59 | | data_dictionary_Rauh | Rauh's German Political Sentiment Dictionary | ✔ | |
60 | | data_dictionary_sentiws | SentimentWortschatz (SentiWS) | ✔ | ✔ |
61 |
62 |
63 | ## Examples
64 |
65 | For a polarity dictionary, we can use the positive and negative key categories from the General Inquirer dictionary:
66 | ```{r}
67 | library("quanteda.sentiment")
68 |
69 | # inspect the dictionary and its polarities
70 | print(data_dictionary_geninqposneg, max_nval = 8)
71 |
72 | # compute sentiment
73 | tail(data_corpus_inaugural) |>
74 | textstat_polarity(dictionary = data_dictionary_geninqposneg)
75 | ```
76 |
77 | For a valence dictionary, we can compute this for the "pleasure" category of the Affective Norms for English Words (ANEW):
78 | ```{r}
79 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE)
80 | library("quanteda.sentiment")
81 |
82 | # inspect the dictionary and its valences
83 | print(data_dictionary_ANEW, max_nval = 8)
84 | lapply(valence(data_dictionary_ANEW), head, 8)
85 |
86 | # compute the sentiment
87 | tail(data_corpus_inaugural) |>
88 | textstat_valence(dictionary = data_dictionary_ANEW["pleasure"])
89 | ```
90 |
91 | We can compare two measures computed in different ways (although they are not comparable, really, since they are different lexicons):
92 | ```{r}
93 | # ensure we have this package's version of the dictionary
94 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
95 |
96 | sent_pol <- tail(data_corpus_inaugural, 25) |>
97 | textstat_polarity(dictionary = data_dictionary_LSD2015)
98 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment)
99 | sent_val <- tail(data_corpus_inaugural, 25) |>
100 | textstat_valence(dictionary = data_dictionary_AFINN)
101 |
102 | library("ggplot2")
103 |
104 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment),
105 | aes(x = polarity, y = valence)) +
106 | geom_point()
107 | ```
108 |
109 | Good enough for government work!
110 |
111 | ## Where to learn more
112 |
113 | Each dictionary and function has extensive documentation, including references to social scientific research articles where each sentiment concept is described in detail. There is also a package vignette with more detailed examples.
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # quanteda.sentiment
3 |
4 |
5 |
6 | [](https://cran.r-project.org/package=quanteda.sentiment)
7 | [](https://github.com/quanteda/quanteda.sentiment)
8 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
10 | [](https://app.codecov.io/gh/quanteda/quanteda.sentiment?branch=master)
12 | [](https://github.com/quanteda/quanteda.sentiment/actions/workflows/R-CMD-check.yaml)
13 |
14 |
15 | ## Installation
16 |
17 | You can install **quanteda.sentiment** from GitHub with:
18 |
19 | ``` r
20 | remotes::install_github("quanteda/quanteda.sentiment")
21 | ```
22 |
23 | The package is not yet on CRAN.
24 |
25 | ## About
26 |
27 | **quanteda.sentiment** extends the **quanteda** package with functions
28 | for computing sentiment on text. It has two main functions, for
29 | computing two types of sentiment. These follow the structure of a
30 | **quanteda** dictionary, which consists of *key* entries expressing the
31 | canonical concept, and *value* patterns (such as “good”, “sad\*“, etc.)
32 | to be matched in a text and counted as occurrences of that key.
33 |
34 | The approach to sentiment in this package approaches sentiment
35 | computation in two ways, depending on whether sentiment is considered a
36 | key attribute, in which case the keys are assigned a *polarity* such as
37 | *positive* or *negative*, or whether individual values are assigned a
38 | *valence*, in the form of some continuous value indicating a degree of
39 | sentiment. Each is implemented in a separate function:
40 |
41 | - **Polarity-based sentiment.** This is implemented via
42 | `textstat_polarity()`, for computing a sentiment based on keys set as
43 | “poles” of positive versus negative sentiment. Setting polarity is
44 | dones through the `polarity()<-` function and can be set for any
45 | dictionary, for any keys. “Sentiment” here can be broadly construed as
46 | any contrasting pair of poles, such as “Democrat” versus “Republican”,
47 | for instance. More than one key can be associated with the same pole.
48 |
49 | Polar values are converted into sentiment scores using a flexible
50 | function, such as $\mathrm{log}(pos / neg)$, or
51 | $(pos - neg)/(pos + neg)$. **quanteda.sentiment** offers three
52 | built-in functions, but the user can supply any function for combining
53 | polarities.
54 |
55 | - **Valence-based sentiment.** This is implemented via
56 | `textstat_valence()`, for computing sentiment as the average valence
57 | of a document’s words, based on a dictionary whose values have numeric
58 | valence scores. Valence scores are set using the `valence()<-`
59 | function. Each key in a dictionary may have values with difference
60 | valences.
61 |
62 | The package comes with the following built-in dictionaries:
63 |
64 | | Name | Description | Polarity | Valence |
65 | |:---------------------------------|:--------------------------------------------------------------|:--------:|:-------:|
66 | | data_dictionary_AFINN | Nielsen’s (2011) ‘new ANEW’ valenced word list | | ✔ |
67 | | data_dictionary_ANEW | Affective Norms for English Words (ANEW) | | ✔ |
68 | | data_dictionary_geninqposneg | Augmented General Inquirer *Positiv* and *Negativ* dictionary | ✔ | |
69 | | data_dictionary_HuLiu | Positive and negative words from Hu and Liu (2004) | ✔ | |
70 | | data_dictionary_LoughranMcDonald | Loughran and McDonald Sentiment Word Lists | ✔ | |
71 | | data_dictionary_LSD2015 | Lexicoder Sentiment Dictionary (2015) | ✔ | |
72 | | data_dictionary_NRC | NRC Word-Emotion Association Lexicon | ✔ | |
73 | | data_dictionary_Rauh | Rauh’s German Political Sentiment Dictionary | ✔ | |
74 | | data_dictionary_sentiws | SentimentWortschatz (SentiWS) | ✔ | ✔ |
75 |
76 | ## Examples
77 |
78 | For a polarity dictionary, we can use the positive and negative key
79 | categories from the General Inquirer dictionary:
80 |
81 | ``` r
82 | library("quanteda.sentiment")
83 | ## Loading required package: quanteda
84 | ## Package version: 4.0.0
85 | ## Unicode version: 14.0
86 | ## ICU version: 71.1
87 | ## Parallel computing: 10 of 10 threads used.
88 | ## See https://quanteda.io for tutorials and examples.
89 | ##
90 | ## Attaching package: 'quanteda.sentiment'
91 | ## The following object is masked from 'package:quanteda':
92 | ##
93 | ## data_dictionary_LSD2015
94 |
95 | # inspect the dictionary and its polarities
96 | print(data_dictionary_geninqposneg, max_nval = 8)
97 | ## Dictionary object with 2 key entries.
98 | ## Polarities: pos = "positive"; neg = "negative"
99 | ## - [positive]:
100 | ## - abide, ability, able, abound, absolve, absorbent, absorption, abundance [ ... and 1,645 more ]
101 | ## - [negative]:
102 | ## - abandon, abandonment, abate, abdicate, abhor, abject, abnormal, abolish [ ... and 2,002 more ]
103 |
104 | # compute sentiment
105 | tail(data_corpus_inaugural) |>
106 | textstat_polarity(dictionary = data_dictionary_geninqposneg)
107 | ## doc_id sentiment
108 | ## 1 2001-Bush 0.9233579
109 | ## 2 2005-Bush 0.9829457
110 | ## 3 2009-Obama 0.5666378
111 | ## 4 2013-Obama 0.7597420
112 | ## 5 2017-Trump 0.7724428
113 | ## 6 2021-Biden 0.6018714
114 | ```
115 |
116 | For a valence dictionary, we can compute this for the “pleasure”
117 | category of the Affective Norms for English Words (ANEW):
118 |
119 | ``` r
120 | library("quanteda", warn.conflicts = FALSE, quietly = TRUE)
121 | library("quanteda.sentiment")
122 |
123 | # inspect the dictionary and its valences
124 | print(data_dictionary_ANEW, max_nval = 8)
125 | ## Dictionary object with 3 key entries.
126 | ## Valences set for keys: pleasure, arousal, dominance
127 | ## - [pleasure]:
128 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
129 | ## - [arousal]:
130 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
131 | ## - [dominance]:
132 | ## - abduction, able, abortion, absent, absurd, abundance, abuse, accept [ ... and 2,463 more ]
133 | lapply(valence(data_dictionary_ANEW), head, 8)
134 | ## $pleasure
135 | ## abduction able abortion absent absurd abundance abuse accept
136 | ## 2.76 6.74 3.50 3.69 4.26 6.59 1.80 6.80
137 | ##
138 | ## $arousal
139 | ## abduction able abortion absent absurd abundance abuse accept
140 | ## 5.53 4.30 5.39 4.73 4.36 5.51 6.83 5.53
141 | ##
142 | ## $dominance
143 | ## abduction able abortion absent absurd abundance abuse accept
144 | ## 3.49 6.83 4.59 4.35 4.73 5.80 3.69 5.41
145 |
146 | # compute the sentiment
147 | tail(data_corpus_inaugural) |>
148 | textstat_valence(dictionary = data_dictionary_ANEW["pleasure"])
149 | ## doc_id sentiment
150 | ## 1 2001-Bush 6.091330
151 | ## 2 2005-Bush 6.308839
152 | ## 3 2009-Obama 5.841437
153 | ## 4 2013-Obama 6.045129
154 | ## 5 2017-Trump 6.223944
155 | ## 6 2021-Biden 6.018528
156 | ```
157 |
158 | We can compare two measures computed in different ways (although they
159 | are not comparable, really, since they are different lexicons):
160 |
161 | ``` r
162 | # ensure we have this package's version of the dictionary
163 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
164 |
165 | sent_pol <- tail(data_corpus_inaugural, 25) |>
166 | textstat_polarity(dictionary = data_dictionary_LSD2015)
167 | sent_pol <- dplyr::mutate(sent_pol, polarity = sentiment)
168 | sent_val <- tail(data_corpus_inaugural, 25) |>
169 | textstat_valence(dictionary = data_dictionary_AFINN)
170 |
171 | library("ggplot2")
172 |
173 | ggplot(data.frame(sent_pol, valence = sent_val$sentiment),
174 | aes(x = polarity, y = valence)) +
175 | geom_point()
176 | ```
177 |
178 | 
179 |
180 | Good enough for government work!
181 |
182 | ## Where to learn more
183 |
184 | Each dictionary and function has extensive documentation, including
185 | references to social scientific research articles where each sentiment
186 | concept is described in detail. There is also a package vignette with
187 | more detailed examples.
188 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/data/data_dictionary_AFINN.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_AFINN.rda
--------------------------------------------------------------------------------
/data/data_dictionary_ANEW.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_ANEW.rda
--------------------------------------------------------------------------------
/data/data_dictionary_HuLiu.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_HuLiu.rda
--------------------------------------------------------------------------------
/data/data_dictionary_LSD2015.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LSD2015.rda
--------------------------------------------------------------------------------
/data/data_dictionary_LoughranMcDonald.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_LoughranMcDonald.rda
--------------------------------------------------------------------------------
/data/data_dictionary_NRC.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_NRC.rda
--------------------------------------------------------------------------------
/data/data_dictionary_Rauh.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_Rauh.rda
--------------------------------------------------------------------------------
/data/data_dictionary_geninqposneg.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_geninqposneg.rda
--------------------------------------------------------------------------------
/data/data_dictionary_sentiws.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/data/data_dictionary_sentiws.rda
--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | AFINN
2 | ANEW’
3 | Analyzing
4 | Codecov
5 | ESWC
6 | EmoLex
7 | FL
8 | GermanPolarityClues
9 | HLT
10 | Heyer
11 | Hu
12 | HuLiu
13 | KDD
14 | Ks
15 | LREC
16 | Lexicoder
17 | Lifecycle
18 | Loughran
19 | LoughranMcDonald
20 | Microblogs
21 | Microposts
22 | Mikhaylov
23 | Mohammad
24 | NAACL
25 | NRC
26 | Negativ
27 | ODbL
28 | Positiv
29 | Quasthoff
30 | READMEs
31 | Rauh
32 | Rauh's
33 | Rauh’s
34 | Remus
35 | Ressources
36 | SIGKDD
37 | Saif
38 | SentiWS
39 | SentimentWortschatz
40 | Soroka
41 | Turney
42 | UF
43 | Waltinger
44 | afinn
45 | analyze
46 | analyzing
47 | damag
48 | dfm
49 | doi
50 | dones
51 | etc
52 | frac
53 | geninqposneg
54 | kein
55 | keine
56 | keinen
57 | mathrm
58 | neut
59 | nicht
60 | nichts
61 | pos
62 | quanteda
63 | sentiws
64 | textstat
65 | th
66 | tibble
67 | tokenization
68 | valenced
69 | Å
70 | Årup
71 |
--------------------------------------------------------------------------------
/inst/extdata/afinn/AFINN-README.txt:
--------------------------------------------------------------------------------
1 | AFINN is a list of English words rated for valence with an integer
2 | between minus five (negative) and plus five (positive). The words have
3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file
4 | is tab-separated. There are two versions:
5 |
6 | AFINN-111: Newest version with 2477 words and phrases.
7 |
8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
9 | are 1480 lines, as some words are listed twice. The word list in not
10 | entirely in alphabetic ordering.
11 |
12 | An evaluation of the word list is available in:
13 |
14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16 |
17 | The list was used in:
18 |
19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in
21 | Twitter", The 2011 International Workshop on Social Computing,
22 | Network, and Services (SocialComNet 2011).
23 |
24 |
25 | This database of words is copyright protected and distributed under
26 | "Open Database License (ODbL) v1.0"
27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28 | copyleft license.
29 |
30 | See comments on the word list here:
31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32 |
33 |
34 | In Python the file may be read into a dictionary with:
35 |
36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)),
37 | [ line.split('\t') for line in open("AFINN-111.txt") ]))
38 | >>> afinn["Good".lower()]
39 | 3
40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41 | 2
42 |
43 |
44 |
--------------------------------------------------------------------------------
/man/data_dictionary_AFINN.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_AFINN}
5 | \alias{data_dictionary_AFINN}
6 | \title{Nielsen's (2011) 'new ANEW' valenced word list}
7 | \format{
8 | A \link{dictionary} with one key, \code{AFINN}, with valences from -5 (negative) to +5
9 | (positive).
10 | }
11 | \source{
12 | \url{http://www2.imm.dtu.dk/pubdb/pubs/6010-full.html}
13 | }
14 | \usage{
15 | data_dictionary_AFINN
16 | }
17 | \description{
18 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Finn Årup
19 | Nielsen's (2011) 'new ANEW' valenced word list, a publicly available list of
20 | English words rated for valence with values between -5 (negative) and +5
21 | (positive). AFINN-111, the latest version, contains 2,477 words and phrases.
22 | }
23 | \section{License}{
24 |
25 | \href{https://opendatacommons.org/licenses/odbl/1-0/}{Open Database License (ODbL) v1.0}
26 | }
27 |
28 | \references{
29 | Nielsen, F. Å. (2011). \href{https://arxiv.org/abs/1103.2903}{A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs.} In \emph{Proceedings
30 | of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come
31 | in Small Packages}, 93--98.
32 | }
33 | \keyword{data}
34 |
--------------------------------------------------------------------------------
/man/data_dictionary_ANEW.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_ANEW}
5 | \alias{data_dictionary_ANEW}
6 | \title{Affective Norms for English Words (ANEW)}
7 | \format{
8 | A \link{dictionary} with three valenced keys: \code{pleasure}, \code{arousal}, and
9 | \code{dominance}, each with valences from 1 to 9 and containing the same 2,471
10 | fixed word values.
11 | }
12 | \usage{
13 | data_dictionary_ANEW
14 | }
15 | \description{
16 | A quanteda dictionary object containing the ANEW, or Affective Norms for
17 | English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a
18 | lexicon of 2,471 distinct fixed word matches that are associated with three
19 | valenced categories: pleasure, arousal, and dominance.
20 | }
21 | \section{License}{
22 |
23 | ANEW Statement of Use
24 |
25 | In accepting the ANEW materials, I agree not to make the ANEW available to
26 | the media (television, magazines, etc.) or to place them on any internet or
27 | computer-accessible websites. I also agree not to publish the ANEW in any
28 | print format – including JOURNALS, newspapers, etc. I also agree that I will
29 | not provide the ANEW materials to profit making companies or organizations
30 | and I agree not to distribute my username and password to unauthorized
31 | parties.
32 | }
33 |
34 | \keyword{data}
35 |
--------------------------------------------------------------------------------
/man/data_dictionary_HuLiu.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_HuLiu}
5 | \alias{data_dictionary_HuLiu}
6 | \title{Positive and negative words from Hu and Liu (2004)}
7 | \format{
8 | A \link{dictionary} of fixed word patterns with two keys:
9 | \itemize{
10 | \item \code{positive}: 2,006 words with positive polarity
11 | \item \code{negative}: 4,783 words with negative polarity
12 | }
13 | }
14 | \source{
15 | \url{https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html}
16 | }
17 | \usage{
18 | data_dictionary_HuLiu
19 | }
20 | \description{
21 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing 2,006
22 | positive and 4,783 negative words from Hu and Liu (2004, 2005).
23 | }
24 | \section{License}{
25 |
26 | Unknown.
27 | }
28 |
29 | \references{
30 | Hu, M. & Liu, B. (2004). \href{https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf}{Mining and Summarizing Customer Reviews}. In
31 | Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
32 | and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA.
33 |
34 | Liu, M., Hu, M., & Cheng, J. (2005). \href{https://www.cs.uic.edu/~liub/publications/www05-p536.pdf}{Opinion Observer: Analyzing and Comparing Opinions on the Web}. In
35 | Proceedings of the 14th International World Wide Web conference (WWW-2005),
36 | May 10--14, 2005, Chiba, Japan.
37 | }
38 | \keyword{data}
39 |
--------------------------------------------------------------------------------
/man/data_dictionary_LSD2015.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_LSD2015}
5 | \alias{data_dictionary_LSD2015}
6 | \title{Lexicoder Sentiment Dictionary (2015)}
7 | \format{
8 | A \link{dictionary} of four keys containing glob-style \link[=valuetype]{pattern matches}.
9 | \describe{
10 | \item{\code{negative}}{2,858 word patterns indicating negative sentiment}
11 | \item{\code{positive}}{1,709 word patterns indicating positive sentiment}
12 | \item{\code{neg_positive}}{1,721 word patterns indicating a positive word preceded
13 | by a negation (used to convey negative sentiment)}
14 | \item{\code{neg_negative}}{2,860 word patterns indicating a negative word preceded
15 | by a negation (used to convey positive sentiment)}
16 | }
17 | }
18 | \usage{
19 | data_dictionary_LSD2015
20 | }
21 | \description{
22 | The 2015 Lexicoder Sentiment Dictionary in \pkg{quanteda} \link{dictionary}
23 | format.
24 | }
25 | \details{
26 | The dictionary consists of 2,858 "negative" sentiment words and 1,709
27 | "positive" sentiment words. A further set of 2,860 and 1,721 negations of
28 | negative and positive words, respectively, is also included. While many users
29 | will find the non-negation sentiment forms of the LSD adequate for sentiment
30 | analysis, Young and Soroka (2012) did find a small, but non-negligible
31 | increase in performance when accounting for negations. Users wishing to test
32 | this or include the negations are encouraged to subtract negated positive
33 | words from the count of positive words, and subtract the negated negative
34 | words from the negative count.
35 |
36 | Young and Soroka (2012) also suggest the use of a pre-processing script to
37 | remove specific cases of some words (i.e., "good bye", or "nobody better",
38 | which should not be counted as positive). Pre-processing scripts are
39 | available at \url{https://www.snsoroka.com/data-lexicoder/}.
40 | }
41 | \section{License and Conditions}{
42 |
43 | The LSD is available for non-commercial academic purposes only. By using
44 | \code{data_dictionary_LSD2015}, you accept these terms.
45 |
46 | Please cite the references below when using the dictionary.
47 | }
48 |
49 | \examples{
50 | # checking polarity
51 | polarity(data_dictionary_LSD2015)
52 |
53 | # simple example
54 | library("quanteda")
55 | txt <- "This aggressive policy will not win friends."
56 |
57 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
58 | exclusive = FALSE)
59 | ## tokens from 1 document.
60 | ## text1 :
61 | ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE" "POSITIVE" "."
62 |
63 | # notice that double-counting of negated and non-negated terms is avoided
64 | # when using nested_scope = "dictionary"
65 | tokens_lookup(tokens(txt), dictionary = data_dictionary_LSD2015,
66 | exclusive = FALSE, nested_scope = "dictionary")
67 | ## tokens from 1 document.
68 | ## text1 :
69 | ## [1] "This" "NEGATIVE" "policy" "will" "NEG_POSITIVE" "POSITIVE."
70 |
71 | # on larger examples - notice that few negations are used
72 | tail(data_corpus_inaugural) |>
73 | tokens() |>
74 | tokens_lookup(dictionary = data_dictionary_LSD2015) |>
75 | dfm()
76 | }
77 | \references{
78 | The objectives, development and reliability of the dictionary are discussed
79 | in detail in Young and Soroka (2012). Please cite this article when using
80 | the Lexicoder Sentiment Dictionary and related resources.
81 | Young, L. & Soroka, S. (2012). \emph{Lexicoder Sentiment
82 | Dictionary}. Available at \url{https://www.snsoroka.com/data-lexicoder/}.
83 |
84 | Young, L. & Soroka, S. (2012). Affective News: The Automated Coding of
85 | Sentiment in Political Texts. \emph{Political Communication}, 29(2), 205--231.
86 | \doi{10.1080/10584609.2012.671234}
87 | }
88 | \keyword{data}
89 |
--------------------------------------------------------------------------------
/man/data_dictionary_LoughranMcDonald.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_LoughranMcDonald}
5 | \alias{data_dictionary_LoughranMcDonald}
6 | \title{Loughran and McDonald Sentiment Word Lists}
7 | \format{
8 | An object of class \code{dictionary2} of length 9.
9 | }
10 | \source{
11 | \url{https://sraf.nd.edu/loughranmcdonald-master-dictionary/}
12 | }
13 | \usage{
14 | data_dictionary_LoughranMcDonald
15 | }
16 | \description{
17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing
18 | the 2014 version of the Loughran and McDonald Sentiment Word Lists. The
19 | categories are "negative" (2355 features), "positive" (354), "uncertainty" (297), "litigious" (903),
20 | "constraining" (184), "superfluous" (56), "interesting" (68), "modal words strong" (68)
21 | and "modal words weak" (0).
22 | }
23 | \references{
24 | Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability?
25 | Textual Analysis, Dictionaries, and 10-Ks.
26 | \emph{Journal of Finance}, 66(1), 35--65. \doi{10.1111/j.1540-6261.2010.01625.x}
27 | }
28 | \keyword{data}
29 |
--------------------------------------------------------------------------------
/man/data_dictionary_NRC.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_NRC}
5 | \alias{data_dictionary_NRC}
6 | \title{NRC Word-Emotion Association Lexicon}
7 | \format{
8 | An object of class \code{dictionary2} of length 10.
9 | }
10 | \source{
11 | \url{https://nrc.canada.ca/en/research-development/products-services/technical-advisory-services/sentiment-emotion-lexicons}
12 |
13 | See also \url{http://saifmohammad.com/WebPages/AccessResource.htm}
14 | }
15 | \usage{
16 | data_dictionary_NRC
17 | }
18 | \description{
19 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing Mohammad and
20 | Charron's (2010, 2013) English version of the NRC Word-Emotion Association
21 | Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight
22 | emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust)
23 | and two sentiments (negative and positive) manually annotated on Amazon's
24 | Mechanical Turk.
25 |
26 | The Sentiment and Emotion Lexicons is a collection of lexicons that was
27 | entirely created by the experts of the National Research Council of Canada.
28 | Developed with a wide range of applications, this lexicon collection can be
29 | used in a multitude of contexts such as sentiment analysis, product
30 | marketing, consumer behaviour and even political campaign analysis.
31 |
32 | The technology uses a list of words that help identify emotions, sentiment,
33 | as well as analyzing hashtags, emoticons and word-colour associations. The
34 | lexicons contain entries for English words, and can be used to analyze
35 | English texts.
36 | }
37 | \note{
38 | Technical and research-related questions can be addressed to Saif M.
39 | Mohammad (Senior Research Scientist at NRC):
40 | \code{Saif.Mohammad@nrc-cnrc.gc.ca}.
41 | }
42 | \section{License and Terms of Use}{
43 |
44 | Free for research purposes.
45 |
46 | For questions about the commercial license, email Pierre Charron (Client
47 | Relationship Leader at NRC): \code{Pierre.Charron@nrc-cnrc.gc.ca}.
48 |
49 | Terms of Use:
50 | \itemize{
51 | \item Cite the papers associated with the lexicons in your research papers and
52 | articles that make use of them. (The papers associated with each lexicon
53 | are listed below, and also in the READMEs for individual lexicons.)
54 | \item In news articles and online posts on work using these lexicons, cite the
55 | appropriate lexicons. For example: "This application/product/tool makes
56 | use of the \verb{resource name}, created by \code{author(s)} at the National
57 | Research Council Canada." (The creators of each lexicon are listed below.
58 | Also, if you send us an email, we will be thrilled to know about how you
59 | have used the lexicon.) If possible hyperlink to this page:
60 | \url{http://saifmohammad.com/WebPages/lexicons.html}.
61 | \item If you use a lexicon in a product or application, then acknowledge this in
62 | the 'About' page and other relevant documentation of the application by
63 | stating the name of the resource, the authors, and NRC. For example: "This
64 | application/product/tool makes use of the \verb{resource name}, created by
65 | \code{author(s)} at the National Research Council Canada." (The creators of
66 | each lexicon are listed below. Also, if you send us an email, we will be
67 | thrilled to know about how you have used the lexicon.) If possible
68 | hyperlink to this page: \url{http://saifmohammad.com/WebPages/lexicons.html}.
69 | \item Do not redistribute the data. Direct interested parties to this page:
70 | \url{http://saifmohammad.com/WebPages/AccessResource.htm}.
71 | \item National Research Council Canada (NRC) disclaims any responsibility for
72 | the use of the lexicons listed here and does not provide technical
73 | support. However, the contact listed above will be happy to respond to
74 | queries and clarifications.
75 | }
76 | }
77 |
78 | \references{
79 | Mohammad, S. & Turney, P. (2013). \href{https://arxiv.org/abs/1308.6297}{Crowdsourcing a Word-Emotion Association Lexicon}. \emph{Computational Intelligence},
80 | 29(3), 436--465.
81 |
82 | Mohammad, S. & Turney, P. (2010). \href{https://dl.acm.org/doi/10.5555/1860631.1860635}{Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon}. In \emph{Proceedings of
83 | the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and
84 | Generation of Emotion in Text}, June 2010, LA, California.
85 | }
86 | \keyword{data}
87 |
--------------------------------------------------------------------------------
/man/data_dictionary_Rauh.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_Rauh}
5 | \alias{data_dictionary_Rauh}
6 | \title{Rauh's German Political Sentiment Dictionary}
7 | \format{
8 | The dictionary has four keys.
9 | \describe{
10 | \item{\code{negative}}{19,750 terms indicating negative sentiment}
11 | \item{\code{positive}}{17,330 terms indicating positive sentiment}
12 | \item{\code{neg_positive}}{17,330 terms indicating a positive word preceded
13 | by a negation (used to convey negative sentiment)}
14 | \item{\code{neg_negative}}{19,750 terms indicating a negative word preceded
15 | by a negation (used to convey positive sentiment)}
16 | }
17 | }
18 | \source{
19 | \doi{10.7910/DVN/BKBXWD}
20 | }
21 | \usage{
22 | data_dictionary_Rauh
23 | }
24 | \description{
25 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the
26 | dictionaries provided in Rauh (forthcoming). Rauh assesses its performance
27 | against human intuition of sentiment in German political language
28 | (parliamentary speeches, party manifestos, and media coverage). The resource
29 | builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and
30 | GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the
31 | negation correction provided by the dictionary, currently a combination of
32 | \link[quanteda:tokens_replace]{tokens_replace} and \link[quanteda:tokens_compound]{tokens_compound} is
33 | required to harmonize the five covered bi-gram patterns prior to scoring. The
34 | example below shows how to conduct this transformation. Note that the process
35 | changes the terms "nicht|nichts|kein|keine|keinen" to a joint term altering
36 | some of the features of the original corpus.
37 | }
38 | \examples{
39 | \donttest{
40 | # tokenize example text
41 | toks <- tokens("nicht schlecht dieses wunderschöne Wörterbuch")
42 | # replace negation markers with "not"
43 | toks1 <- tokens_replace(toks, pattern = c("nicht", "nichts", "kein",
44 | "keine", "keinen"),
45 | replacement = rep("not", 5))
46 | # compound bi-gram negation patterns
47 | toks2 <- tokens_compound(toks1, data_dictionary_Rauh, concatenator = " ")
48 |
49 | # apply dictionary
50 | tokens_lookup(toks2, dictionary = data_dictionary_Rauh) |>
51 | dfm()
52 | }
53 | }
54 | \references{
55 | Rauh, C. (2018). Validating a Sentiment Dictionary for German Political
56 | Language: A Workbench Note.
57 | \emph{Journal of Information Technology & Politics}, 15(4), 319--343.
58 | \doi{10.1080/19331681.2018.1485608}
59 |
60 | Remus, R., Quasthoff U., & Heyer, G. (2010). "\href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.}"
61 | In \emph{Proceedings of the 7th International Language Resources and Evaluation
62 | (LREC'10)}, 1168--1171.
63 |
64 | Waltinger, U. (2010). "\href{http://www.ulliwaltinger.de/pdf/91_Paper.pdf}{GermanPolarityClues: A Lexical Resource for German Sentiment Analysis}." In
65 | \emph{International Conference on Language Resources and Evaluation}, 17--23 May
66 | 2010 LREC'10.
67 | }
68 | \keyword{data}
69 |
--------------------------------------------------------------------------------
/man/data_dictionary_geninqposneg.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_geninqposneg}
5 | \alias{data_dictionary_geninqposneg}
6 | \title{Augmented General Inquirer \emph{Positiv} and \emph{Negativ} dictionary}
7 | \format{
8 | A \link{dictionary} of fixed word patterns with two keys:
9 | \itemize{
10 | \item \code{positive}: 1,653 words with positive polarity
11 | \item \code{negative}: 2,010 words with negative polarity
12 | }
13 | }
14 | \source{
15 | \verb{http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm} --
16 | although this site ceased operating some time in 2021
17 | }
18 | \usage{
19 | data_dictionary_geninqposneg
20 | }
21 | \description{
22 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing the
23 | \emph{Positiv} and \emph{Negativ} dictionary entries from the augmented
24 | General Inquirer. These are new valence categories described at
25 | \verb{http://www.wjh.harvard.edu/~inquirer/homecat.htm} but also including the
26 | terms from the "yes" "no" dictionary entries.
27 | }
28 | \references{
29 | Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966).
30 | \emph{The General Inquirer: A Computer Approach to Content Analysis.}
31 | Cambridge, MA: MIT Press.
32 | }
33 | \keyword{data}
34 |
--------------------------------------------------------------------------------
/man/data_dictionary_sentiws.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-documentation.R
3 | \docType{data}
4 | \name{data_dictionary_sentiws}
5 | \alias{data_dictionary_sentiws}
6 | \title{SentimentWortschatz (SentiWS)}
7 | \format{
8 | An object of class \code{dictionary2} of length 2.
9 | }
10 | \source{
11 | \url{https://wortschatz.uni-leipzig.de/en/download/}
12 | }
13 | \usage{
14 | data_dictionary_sentiws
15 | }
16 | \description{
17 | A \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object containing
18 | SentimentWortschatz (SentiWS), a publicly available German-language resource
19 | for sentiment analysis. The current version of SentiWS contains 1,650
20 | positive and 1,818 negative words, which sum up to 15,649 positive and 15,632
21 | negative word forms including their inflections. It not only contains
22 | adjectives and adverbs explicitly expressing a sentiment, but also nouns and
23 | verbs implicitly containing one. The original dictionary weights within the
24 | interval of -1 to 1. Note that the version implemented in
25 | \pkg{quanteda.dictionaries} uses a binary classification into positive
26 | (weight > 0) and negative (weight < 0) features.
27 | }
28 | \references{
29 | Remus, R., Quasthoff U., and Heyer, G. (2010). \href{http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf}{SentiWS: a Publicly Available German-language Resource for Sentiment Analysis}.
30 | In \emph{Proceedings of the 7th International Language Ressources and Evaluation
31 | (LREC'10)}, 1168--1171.
32 | }
33 | \keyword{data}
34 |
--------------------------------------------------------------------------------
/man/get_polarity_dictionary.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_polarity.R
3 | \name{get_polarity_dictionary}
4 | \alias{get_polarity_dictionary}
5 | \title{Get a standard polarity dictionary for sentiment analysis}
6 | \usage{
7 | get_polarity_dictionary(dictionary)
8 | }
9 | \arguments{
10 | \item{dictionary}{a \pkg{quanteda} \link{dictionary}}
11 | }
12 | \value{
13 | a single-level \link{dictionary} with keys \code{pos}, \code{neg}, and (optionally)
14 | \code{neut}.
15 | }
16 | \description{
17 | Checks and standardizes a \link{dictionary} object with its \link{polarity} set, so
18 | that the polarity categories are standardized into the keys \code{pos}, \code{neg}, and
19 | (optionally) \code{neut}. Also checks that the dictionary contains all of the
20 | keys named in the polarity object. (It is necessary to check here since the
21 | dictionary could have been subset after creation.)
22 | }
23 | \keyword{internal}
24 |
--------------------------------------------------------------------------------
/man/images/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-5-1.png
--------------------------------------------------------------------------------
/man/images/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/man/images/unnamed-chunk-6-1.png
--------------------------------------------------------------------------------
/man/polarity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_polarity.R
3 | \name{polarity}
4 | \alias{polarity}
5 | \alias{polarity<-}
6 | \title{Set or get the sentiment polarity of a dictionary}
7 | \usage{
8 | polarity(x)
9 |
10 | polarity(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{a \link{dictionary} object}
14 |
15 | \item{value}{list consisting of named character vectors \code{pos}, \code{neg}, and
16 | (optionally) \code{neut} corresponding to positive, negative, and neutral
17 | sentiment categories respectively. Each element may contain multiple
18 | key names. The \code{neut} category is optional but \code{pos} and \code{neg} must be
19 | supplied.}
20 | }
21 | \value{
22 | \code{polarity()} returns the polarity as a list.
23 |
24 | \verb{polarity<-} sets the dictionary's polarity.
25 | }
26 | \description{
27 | Set or retrieve the polarity of a \link{dictionary} object for the purposes of
28 | sentiment analysis. Polarity consists of a set of dictionary keys that are
29 | associated with positive, negative, and (optionally) neutral categories for
30 | use in \code{\link[=textstat_polarity]{textstat_polarity()}}.
31 | }
32 | \details{
33 | A dictionary may have only one set of polarities at a time, but may be
34 | changed as needed.
35 | }
36 | \examples{
37 | library("quanteda")
38 | simpledict <- dictionary(list(
39 | happy = c("happy", "jubilant", "exuberant"),
40 | sad = c("sad", "morose", "down")
41 | ))
42 | polarity(simpledict)
43 | polarity(simpledict) <- list(pos = "happy", neg = "sad")
44 | polarity(simpledict)
45 |
46 | # can list multiple keys
47 | polarity(data_dictionary_LSD2015) <- list(
48 | pos = c("positive", "neg_negative"),
49 | neg = c("negative", "neg_positive")
50 | )
51 | polarity(data_dictionary_LSD2015)
52 | }
53 | \keyword{dictionary}
54 | \keyword{textstat}
55 | \keyword{utility}
56 |
--------------------------------------------------------------------------------
/man/quanteda.sentiment-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/quanteda.sentiment-package.R
3 | \docType{package}
4 | \name{quanteda.sentiment-package}
5 | \alias{quanteda.sentiment}
6 | \alias{quanteda.sentiment-package}
7 | \title{quanteda.sentiment: Sentiment Analysis using 'quanteda'}
8 | \description{
9 | Adds functions and dictionaries for computing sentiment using the 'quanteda' package.
10 | }
11 | \author{
12 | \strong{Maintainer}: Kenneth Benoit \email{kbenoit@lse.ac.uk} [copyright holder]
13 |
14 | }
15 | \keyword{internal}
16 |
--------------------------------------------------------------------------------
/man/sentiment-functions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_polarity.R
3 | \name{sentiment-functions}
4 | \alias{sentiment-functions}
5 | \alias{sent_logit}
6 | \alias{sent_abspropdiff}
7 | \alias{sent_relpropdiff}
8 | \title{Sentiment functions}
9 | \usage{
10 | sent_logit(x, smooth = 0.5)
11 |
12 | sent_abspropdiff(x)
13 |
14 | sent_relpropdiff(x)
15 | }
16 | \arguments{
17 | \item{x}{a \link{dfm} that has the following required feature names: \code{pos},
18 | \code{neg}, \code{neut}, and \code{other}}
19 |
20 | \item{smooth}{additional smoothing function added to \code{pos} and \code{neg} before
21 | logarithmic transformation}
22 | }
23 | \value{
24 | a sparse \pkg{Matrix} object of documents by sentiment score, where
25 | the sentiment score is the only column. (Its name is unimportant as this
26 | will not be used by \code{\link[=textstat_polarity]{textstat_polarity()}}.)
27 | }
28 | \description{
29 | Functions for computing sentiment, for \code{\link[=textstat_polarity]{textstat_polarity()}}. Each function
30 | takes an input \link{dfm} with fixed feature names (see Details), and returns a
31 | sparse Matrix with a single column representing the results of the sentiment
32 | calculation.
33 |
34 | \code{sent_logit} is \eqn{log(\frac{pos}{neg})}.
35 |
36 | \code{sent_abspropdiff} is \eqn{\frac{pos - neg}{N}}, where \eqn{N}
37 | is the total number of all features in a document.
38 |
39 | \code{sent_relpropdiff} is \eqn{\frac{pos - neg}{pos + neg}}.
40 | }
41 | \details{
42 | User supplied functions must take \code{x} and optional additional arguments, such
43 | as \code{smooth} for a smoothing constant for the logit scaling function. feature
44 | names for the sentiment categories \code{pos}, \code{neg}, \code{neut}, and \code{other}. (The
45 | \code{other} category is only required when a scaling function needs the count of
46 | non-sentiment associated features.)
47 |
48 | Additional arguments may be passed via \code{...}, such as \code{smooth} for the logit
49 | scale.
50 | }
51 | \examples{
52 | library("quanteda")
53 | dfmat <- c("pos pos pos neg pos pos", "neg neg pos pos pos") |>
54 | tokens() |>
55 | dfm()
56 | sent_logit(dfmat)
57 | sent_abspropdiff(dfmat)
58 |
59 | # user-supplied function
60 | my_sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
61 | my_sent_fn(dfmat)
62 |
63 | # user supplied function with fixed weights and using neutral category
64 | dfmat2 <- c("pos pos neut neg neut pos", "neg neg neut neut pos") |>
65 | tokens() |>
66 | dfm()
67 | my_sent_fn2 <- function(x) (x[, "pos"]*3 + x[, "neut"]*2 + x[, "neg"]*1)/3
68 | my_sent_fn2(dfmat2)
69 | }
70 | \references{
71 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
72 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
73 | Quarterly}, 36(1), 123–155.
74 | \doi{10.1111/j.1939-9162.2010.00006.x}
75 | }
76 | \keyword{internal}
77 | \keyword{textstat}
78 |
--------------------------------------------------------------------------------
/man/textstat_polarity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_polarity.R
3 | \name{textstat_polarity}
4 | \alias{textstat_polarity}
5 | \title{Compute sentiment from key polarities}
6 | \usage{
7 | textstat_polarity(x, dictionary, fun = sent_logit, ...)
8 | }
9 | \arguments{
10 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing
11 | text, tokens, or features whose sentiment will be scored}
12 |
13 | \item{dictionary}{a \link{dictionary} that has \link{polarity} set, indicating which
14 | keys are associated with positive, negative, and (optionally) neutral
15 | sentiment}
16 |
17 | \item{fun}{function; the formula for computing sentiment, which must refer to
18 | \code{pos}, \code{neg}, and (optionally) \code{neut}. The default is the "logit" scale
19 | (Lowe et al 2011) which is the log of (positive / negative) counts. See
20 | \link{sentiment-functions} for details and for additional available functions,
21 | as well as details on how to supply custom functions.}
22 |
23 | \item{...}{additional arguments passed to \code{fun}}
24 | }
25 | \value{
26 | a \link{data.frame} of sentiment scores
27 | }
28 | \description{
29 | Compute sentiment scores using a polarity approach, based on assigned
30 | categories (types or features) of positive, negative, and neutral sentiment.
31 | Several formulas for combining the polar categories are available, or the
32 | user can supply a custom function.
33 | }
34 | \examples{
35 | library("quanteda")
36 | corp <- tail(data_corpus_inaugural, n = 5)
37 | toks <- tokens(corp)
38 | dfmat <- dfm(toks)
39 | polar1 <- list(pos = "positive", neg = "negative")
40 | polar2 <- list(pos = c("positive", "neg_negative"),
41 | neg = c("negative", "neg_positive"))
42 |
43 | polarity(data_dictionary_LSD2015) <- polar1
44 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
45 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
46 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
47 |
48 | polarity(data_dictionary_LSD2015) <- polar2
49 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
50 | textstat_polarity(toks, dictionary = data_dictionary_LSD2015)
51 | textstat_polarity(corp, dictionary = data_dictionary_LSD2015)
52 | textstat_polarity(dfmat, dictionary = data_dictionary_LSD2015)
53 |
54 | # with a user-supplied function
55 | sent_fn <- function(x) (x[, "pos"] - x[, "neg"]) / rowSums(x) * 100
56 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_fn)
57 | }
58 | \references{
59 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
60 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
61 | Quarterly}, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}
62 | }
63 |
--------------------------------------------------------------------------------
/man/textstat_valence.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_valence.R
3 | \name{textstat_valence}
4 | \alias{textstat_valence}
5 | \title{Compute sentiment from word valences}
6 | \usage{
7 | textstat_valence(
8 | x,
9 | dictionary,
10 | normalization = c("dictionary", "all", "none"),
11 | ...
12 | )
13 | }
14 | \arguments{
15 | \item{x}{a character, \link{corpus}, \link{tokens}, or \link{dfm} object containing
16 | text, tokens, or features whose sentiment will be scored.}
17 |
18 | \item{dictionary}{a \pkg{quanteda} \link{dictionary} that has \link{valence} set, in
19 | the form of numerical valences associated with sentiment}
20 |
21 | \item{normalization}{the baseline for normalizing the sentiment counts after
22 | scoring. Sentiment scores within keys are weighted means of the tokens
23 | matched to dictionary values, weighted by their valences. The default
24 | \code{"dictionary"} is to average over only the valenced words. \code{"all"}
25 | averages across all tokens, and \code{"none"} does no normalization.}
26 |
27 | \item{...}{not used here}
28 | }
29 | \value{
30 | a data.frame of sentiment scores
31 | }
32 | \description{
33 | Compute sentiment scores from tokens or document-feature matrices, based on
34 | the valences of dictionary keys and values.
35 | }
36 | \note{
37 | If the input item is a \link{dfm}, then multi-word values will not be matched
38 | unless the features of the \link{dfm} have been compounded previously. The input
39 | objects should not have had dictionaries applied previously.
40 | }
41 | \examples{
42 | library("quanteda")
43 | \dontrun{
44 |
45 | # AFINN
46 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt",
47 | package = "quanteda.sentiment"),
48 | header = FALSE, col.names = c("word", "valence"))
49 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
50 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
51 | textstat_valence(toks, dictionary = data_dictionary_afinn)
52 |
53 | # ANEW
54 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
55 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
56 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word,
57 | arousal = anew$Word,
58 | dominance = anew$Word))
59 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn,
60 | arousal = anew$AroMn,
61 | dominance = anew$DomMn)
62 | textstat_valence(toks, data_dictionary_anew["pleasure"])
63 | textstat_valence(toks, data_dictionary_anew["arousal"])}
64 |
65 | }
66 | \references{
67 | For a discussion of how to aggregate sentiment scores to the document
68 | level, see:
69 |
70 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011).
71 | Scaling Policy Preferences from Coded Political Texts. \emph{Legislative Studies
72 | Quarterly}, 36(1), 123–155.
73 | \doi{10.1111/j.1939-9162.2010.00006.x}
74 | }
75 | \seealso{
76 | \code{\link[=valence]{valence()}}
77 | }
78 |
--------------------------------------------------------------------------------
/man/valence.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/textstat_valence.R
3 | \name{valence}
4 | \alias{valence}
5 | \alias{valence<-}
6 | \title{Set or get the valences of dictionary values or keys}
7 | \usage{
8 | valence(x)
9 |
10 | valence(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{a \pkg{quanteda} \link[quanteda:dictionary]{dictionary} object}
14 |
15 | \item{value}{named list consisting of numerical value. The names of the
16 | elements must correspond to a dictionary key. Each element must be:
17 | \itemize{
18 | \item a single numeric value that will be applied to all of the dictionary
19 | values in that key; or
20 | \item a vector of numeric values that matches the length and order of the
21 | dictionary values in that key; or
22 | \item a named numeric vector where each element name matches dictionary values
23 | in the key.
24 | }}
25 | }
26 | \value{
27 | \code{valences()} returns the valences as a list named numeric vectors,
28 | where each list element corresponds to a key in the dictionary, and each
29 | numeric element matches a value within that key.
30 |
31 | \verb{valence<-} sets the dictionary's valences.
32 | }
33 | \description{
34 | Set or retrieve the valences of a \link{dictionary} object for the purposes of
35 | sentiment analysis. Valences consist of numerical values attached to each
36 | dictionary "value". For dictionaries with a more "polarity"-based approach,
37 | see \code{\link[=textstat_polarity]{textstat_polarity()}}
38 | }
39 | \details{
40 | Valences are used only in \code{\link[=textstat_valence]{textstat_valence()}}.
41 |
42 | A dictionary may have only one set of valences at a time, but may be
43 | changed as needed.
44 | }
45 | \examples{
46 | library("quanteda")
47 |
48 | # setting valences
49 | dict <- dictionary(list(
50 | happiness = c("happy", "jubilant", "exuberant", "content"),
51 | anger = c("mad", "peeved", "irate", "furious", "livid")
52 | ))
53 | valence(dict)
54 | # using a 5-point scale: 1:1 match
55 | valence(dict) <- list(happiness = c(3, 4, 5, 2),
56 | anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
57 | valence(dict)
58 | # with single valences applied to all values within the keys
59 | valence(dict) <- c(happiness = 1, anger = -1)
60 | valence(dict)
61 | # with named elements - order does not matter
62 | valence(dict) <- list(
63 | happiness = c(exuberant = 5, jubilant = 4, happy = 3, content = 2)
64 | )
65 | valence(dict)
66 |
67 | }
68 | \seealso{
69 | \code{\link[=textstat_valence]{textstat_valence()}}, \code{\link[=valence]{valence()}}
70 | }
71 | \keyword{dictionary}
72 | \keyword{textstat}
73 | \keyword{utility}
74 |
--------------------------------------------------------------------------------
/sources/AFINN/AFINN-README.txt:
--------------------------------------------------------------------------------
1 | AFINN is a list of English words rated for valence with an integer
2 | between minus five (negative) and plus five (positive). The words have
3 | been manually labeled by Finn Årup Nielsen in 2009-2011. The file
4 | is tab-separated. There are two versions:
5 |
6 | AFINN-111: Newest version with 2477 words and phrases.
7 |
8 | AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
9 | are 1480 lines, as some words are listed twice. The word list in not
10 | entirely in alphabetic ordering.
11 |
12 | An evaluation of the word list is available in:
13 |
14 | Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
15 | sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
16 |
17 | The list was used in:
18 |
19 | Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
20 | Michael Etter, "Good Friends, Bad News - Affect and Virality in
21 | Twitter", The 2011 International Workshop on Social Computing,
22 | Network, and Services (SocialComNet 2011).
23 |
24 |
25 | This database of words is copyright protected and distributed under
26 | "Open Database License (ODbL) v1.0"
27 | https://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
28 | copyleft license.
29 |
30 | See comments on the word list here:
31 | http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
32 |
33 |
34 | In Python the file may be read into a dictionary with:
35 |
36 | >>> afinn = dict(map(lambda (k,v): (k,int(v)),
37 | [ line.split('\t') for line in open("AFINN-111.txt") ]))
38 | >>> afinn["Good".lower()]
39 | 3
40 | >>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
41 | 2
42 |
43 |
44 |
--------------------------------------------------------------------------------
/sources/AFINN/create-data_dictionary_AFINN.R:
--------------------------------------------------------------------------------
1 | # AFINN Dictionary
2 |
3 | library("quanteda")
4 |
5 | afinn111 <- read.delim("AFINN/AFINN-111.txt", header = FALSE, col.names = c("word", "valence"))
6 | afinn96 <- read.delim("AFINN/AFINN-96.txt", header = FALSE, col.names = c("word", "valence"))
7 |
8 | afinn111 <- dplyr::arrange(afinn111, word)
9 | afinn96 <- dplyr::arrange(afinn96, word)
10 | dplyr::filter(afinn96, duplicated(afinn96$word))
11 |
12 | data_dictionary_AFINN <- dictionary(list("AFINN" = afinn111$word))
13 | valence(data_dictionary_AFINN) <- list("AFINN" = afinn111$valence)
14 |
15 | meta(data_dictionary_AFINN) <- list(
16 | title = "Finn Årup Nielsen's (2011) 'new ANEW' valenced word list",
17 | description = "AFINN is a list of English words rated for valence with an integer between minus five (negative) and plus five (positive), manually labeled by Finn Årup Nielsen in 2009-2011. This dictionary is the newer AFINN-111 version with 2,477 words and phrases.",
18 | url = "http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=6010",
19 | reference = "Nielsen, F. Å. (2011). A new ANEW: Evaluation of a Word List for Sentiment Analysis in Microblogs. In Proceedings of the ESWC2011 Workshop on 'Making Sense of Microposts': Big Things Come in Small Packages, 93--98.",
20 | license = "This database of words is copyright protected and distributed under the Open Database License (ODbL) v1.0, https://www.opendatacommons.org/licenses/odbl/1.0/"
21 | )
22 |
23 | meta(data_dictionary_AFINN) <-
24 | lapply(meta(data_dictionary_AFINN), function(x) Encoding(x) <- "UTF-8")
25 |
26 | usethis::use_data(data_dictionary_AFINN, overwrite = TRUE)
27 |
28 |
--------------------------------------------------------------------------------
/sources/ANEW/create-data_dictionary_ANEW.R:
--------------------------------------------------------------------------------
1 | # ANEW
2 |
3 | library("quanteda")
4 |
5 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
6 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
7 | data_dictionary_ANEW <- dictionary(list(pleasure = anew$Word,
8 | arousal = anew$Word,
9 | dominance = anew$Word))
10 | valence(data_dictionary_ANEW) <- list(pleasure = anew$ValMn,
11 | arousal = anew$AroMn,
12 | dominance = anew$DomMn)
13 |
14 | meta(data_dictionary_ANEW) <-
15 | list(
16 | title = "Affective Norms for English Words (ANEW)",
17 | description = "A quanteda dictionary object containing the ANEW, or Affective Norms for English Words (Bradley and Lang 2017) valenced lexicon. The ANEW provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance.",
18 | url = "https://csea.phhp.ufl.edu/media.html#bottommedia",
19 | reference = "Bradley, M.M. & Lang, P.J. (2017). Affective Norms for English Words (ANEW): Instruction manual and affective ratings. Technical Report C-3. Gainesville, FL:UF Center for the Study of Emotion and Attention.",
20 | license = "For non-profit academic research purposes."
21 | )
22 |
23 | usethis::use_data(data_dictionary_ANEW, overwrite = TRUE)
24 |
25 |
--------------------------------------------------------------------------------
/sources/Hu-Liu/create_data_dictionary-HuLiu.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 |
3 | data_dictionary_HuLiu <-
4 | dictionary(list(positive = scan(file = "Hu-Liu/positive-words.txt",
5 | what = "character", comment.char = ";"),
6 | negative = scan(file = "Hu-Liu/negative-words-UTF8.txt",
7 | what = "character", comment.char = ";")))
8 |
9 | meta(data_dictionary_HuLiu) <-
10 | list(
11 | title = "Positive and negative words from Hu and Liu (2004)",
12 | url = "http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html",
13 | description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).",
14 | reference = "Hu, M. & Liu, B. (2004). Mining and Summarizing Customer Reviews. In Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2004), Aug 22--25, 2004, Seattle, Washington, USA. https://www.cs.uic.edu/~liub/publications/kdd04-revSummary.pdf
15 |
16 | Liu, M., Hu, M., & Cheng, J. (2005). Opinion Observer: Analyzing and Comparing Opinions on the Web. In Proceedings of the 14th International World Wide Web conference (WWW-2005), May 10--14, 2005, Chiba, Japan. https://www.cs.uic.edu/~liub/publications/www05-p536.pdf",
17 | license = "Unknown"
18 | )
19 | polarity(data_dictionary_HuLiu) <- list(pos = "positive", neg = "negative")
20 |
21 | usethis::use_data(data_dictionary_HuLiu, overwrite = TRUE)
22 |
--------------------------------------------------------------------------------
/sources/Laver-Garry/Laver_and_Garry_2000.cat:
--------------------------------------------------------------------------------
1 | CULTURE
2 | CULTURE-HIGH
3 | ART (1)
4 | ARTISTIC (1)
5 | DANCE (1)
6 | GALLER* (1)
7 | MUSEUM* (1)
8 | MUSIC* (1)
9 | OPERA* (1)
10 | THEATRE* (1)
11 | CULTURE-POPULAR
12 | MEDIA (1)
13 | SPORT
14 | ANGLER* (1)
15 | PEOPLE (1)
16 | WAR_IN_IRAQ (1)
17 | CIVIL_WAR (1)
18 | ECONOMY
19 | +STATE+
20 | ACCOMMODATION (1)
21 | AGE (1)
22 | AMBULANCE (1)
23 | ASSIST (1)
24 | BENEFIT (1)
25 | CARE (1)
26 | CARER* (1)
27 | CHILD* (1)
28 | CLASS (1)
29 | CLASSES (1)
30 | CLINICS (1)
31 | COLLECTIVE* (1)
32 | CONTRIBUTION* (1)
33 | COOPERATIVE* (1)
34 | CO-OPERATIVE* (1)
35 | DEPRIVATION (1)
36 | DISABILITIES (1)
37 | DISADVANTAGED (1)
38 | EDUCAT* (1)
39 | ELDERLY (1)
40 | EQUAL* (1)
41 | ESTABLISH (1)
42 | FAIR* (1)
43 | GUARANTEE* (1)
44 | HARDSHIP (1)
45 | HEALTH* (1)
46 | HOMELESS* (1)
47 | HOSPITAL* (1)
48 | HUNGER (1)
49 | INEQUAL* (1)
50 | INVEST (1)
51 | INVESTING (1)
52 | INVESTMENT (1)
53 | MEANS-TEST* (1)
54 | NURSE* (1)
55 | PATIENTS (1)
56 | PENSION (1)
57 | POOR (1)
58 | POORER (1)
59 | POOREST (1)
60 | POVERTY (1)
61 | REHOUSE* (1)
62 | RE-HOUSE* (1)
63 | SCHOOL (1)
64 | TEACH* (1)
65 | TRANSPORT (1)
66 | UNDERFUND* (1)
67 | UNEMPLOY* (1)
68 | VULNERABLE (1)
69 | WIDOW* (1)
70 | =STATE=
71 | ACCOUNTANT (1)
72 | ACCOUNTING (1)
73 | ACCOUNTS (1)
74 | ADVERT* (1)
75 | AIRLINE* (1)
76 | AIRPORT* (1)
77 | AUDIT* (1)
78 | BANK* (1)
79 | BARGAINING (1)
80 | BREADWINNER* (1)
81 | BUDGET* (1)
82 | BUY* (1)
83 | CARTEL* (1)
84 | CASH* (1)
85 | CHARGE* (1)
86 | COMMERCE* (1)
87 | COMPENSAT* (1)
88 | CONSUM* (1)
89 | COST* (1)
90 | CREDIT* (1)
91 | CUSTOMER* (1)
92 | DEBT* (1)
93 | DEFICIT* (1)
94 | DWELLING* (1)
95 | EARN* (1)
96 | ECON* (1)
97 | ELECTRICITY (1)
98 | ESTATE* (1)
99 | EXPORT* (1)
100 | FEE (1)
101 | FEES (1)
102 | FINANC* (1)
103 | HOUS* (1)
104 | IMPORT (1)
105 | IMPORTS (1)
106 | INDUSTR* (1)
107 | JOBS (1)
108 | LEASE* (1)
109 | LOAN* (1)
110 | MANUFACTUR* (1)
111 | MORTGAGE* (1)
112 | NEGOTIAT* (1)
113 | OPPORTUNITY (1)
114 | PARTNERSHIP* (1)
115 | PASSENGER* (1)
116 | PAY* (1)
117 | PERFORMANCE (1)
118 | PORT* (1)
119 | PRODUCTIVITY (1)
120 | PROFESSION* (1)
121 | PURCHAS* (1)
122 | RAILWAY* (1)
123 | REBATE* (1)
124 | RECESSION* (1)
125 | RESEARCH* (1)
126 | REVENUE* (1)
127 | SALAR* (1)
128 | SELL* (1)
129 | SETTLEMENT (1)
130 | SOFTWARE (1)
131 | SUPPLIER* (1)
132 | SUPPLY (1)
133 | TELECOM* (1)
134 | TELEPHON* (1)
135 | TENAN* (1)
136 | TOURIS* (1)
137 | TRADE (1)
138 | TRAIN* (1)
139 | WAGE* (1)
140 | WELFARE (1)
141 | WORK* (1)
142 | -STATE-
143 | ASSETS (1)
144 | AUTONOMY (1)
145 | BARRIER* (1)
146 | BID (1)
147 | BIDDERS (1)
148 | BIDDING (1)
149 | BURDEN* (1)
150 | CHARIT* (1)
151 | CHOICE* (1)
152 | COMPET* (1)
153 | CONFIDENCE (1)
154 | CONFISCATORY (1)
155 | CONSTRAIN* (1)
156 | CONTRACTING* (1)
157 | CONTRACTOR* (1)
158 | CONTROLLED (1)
159 | CONTROLLING (1)
160 | CONTROLS (1)
161 | CORPORATE (1)
162 | CORPORATION* (1)
163 | DEREGULATING (1)
164 | DISMANTL* (1)
165 | ENTREPRENEUR* (1)
166 | EXPENSIVE (1)
167 | FLEXIB* (1)
168 | FRANCHISE* (1)
169 | FUNDHOLD* (1)
170 | FUND-HOLDING (1)
171 | HOMESTEAD* (1)
172 | INITIATIVE (1)
173 | INTRUSIVE (1)
174 | INVESTOR* (1)
175 | LIBERALI* (1)
176 | MARKET* (1)
177 | MONETARY (1)
178 | MONEY (1)
179 | OWN* (1)
180 | PRIVATE (1)
181 | PRIVATELY (1)
182 | PRIVATISATIONS (1)
183 | PRIVATISED (1)
184 | PRIVATISING (1)
185 | PRODUCE* (1)
186 | PROFITABLE (1)
187 | REGULAT* (1)
188 | RETAIL* (1)
189 | RISK (1)
190 | RISKS (1)
191 | SAVINGS (1)
192 | SELL* (1)
193 | SHARES (1)
194 | SIMPLIF* (1)
195 | SPEND* (1)
196 | SPONSORSHIP (1)
197 | TAXABLE (1)
198 | TAXES (1)
199 | TAX-FREE (1)
200 | THRIFT* (1)
201 | TRADING (1)
202 | VALUE (1)
203 | VOLUNT* (1)
204 | VOUCHER* (1)
205 | ENVIRONMENT
206 | CON ENVIRONMENT
207 | PRODUC* (1)
208 | PRO ENVIRONMENT
209 | CAR (1)
210 | CATALYTIC (1)
211 | CHEMICAL* (1)
212 | CHIMNEY* (1)
213 | CLEAN* (1)
214 | CONGESTION (1)
215 | CYCLIST* (1)
216 | DEPLET* (1)
217 | ECOLOG* (1)
218 | EMISSION* (1)
219 | ENERGY-SAVING (1)
220 | ENVIRONMENT* (1)
221 | FUR (1)
222 | GREEN (1)
223 | HABITAT* (1)
224 | HEDGEROW* (1)
225 | HUSBANDED (1)
226 | LITTER* (1)
227 | OPENCAST (1)
228 | OPEN-CAST* (1)
229 | OZONE (1)
230 | PLANET (1)
231 | POPULATION (1)
232 | RECYCL* (1)
233 | RE-CYCL* (1)
234 | RE-USE (1)
235 | TOXIC (1)
236 | WARMING (1)
237 | GROUPS
238 | ETHNIC
239 | ASIAN* (1)
240 | BUDDHIST* (1)
241 | ETHNIC* (1)
242 | RACE (1)
243 | RACI* (1)
244 | WOMEN
245 | GIRLS (1)
246 | WOMAN (1)
247 | WOMEN (1)
248 | INSTITUTIONS
249 | CONSERVATIVE
250 | AUTHORITY (1)
251 | CONTINU* (1)
252 | DISRUPT* (1)
253 | INSPECT* (1)
254 | JURISDICTION* (1)
255 | LEGITIMATE (1)
256 | MANAG* (1)
257 | MORATORIUM (1)
258 | RUL* (1)
259 | STRIKE* (1)
260 | WHITEHALL (1)
261 | NEUTRAL
262 | ADMINISTR* (1)
263 | ADVIS* (1)
264 | AGENC* (1)
265 | AMALGAMAT* (1)
266 | APPOINT* (1)
267 | ASSEMBLY (1)
268 | CHAIR* (1)
269 | COMMISSION* (1)
270 | COMMITTEE* (1)
271 | CONSTITUEN* (1)
272 | COUNCIL* (1)
273 | DEPARTMENT* (1)
274 | DIRECTORATE* (1)
275 | EXECUTIVE* (1)
276 | HEADQUARTERS (1)
277 | LEGISLAT* (1)
278 | MECHANISM* (1)
279 | MINISTER* (1)
280 | OFFICE (1)
281 | OFFICES (1)
282 | OFFICIAL (1)
283 | OPERAT* (1)
284 | OPPOSITION (1)
285 | ORGANISATION* (1)
286 | PARLIAMENT* (1)
287 | PRESIDEN* (1)
288 | PROCEDUR* (1)
289 | PROCESS* (1)
290 | QUEEN (1)
291 | REGIST* (1)
292 | SCHEME* (1)
293 | SECRETARIAT* (1)
294 | SOVEREIGN* (1)
295 | SUBCOMMITTEE* (1)
296 | TRIBUNAL* (1)
297 | VOTE* (1)
298 | VOTING (1)
299 | WESTMINSTER (1)
300 | RADICAL
301 | ABOLITION (1)
302 | ACCOUNTABLE (1)
303 | ANSWERABLE (1)
304 | CONSULT* (1)
305 | CORRUPT* (1)
306 | DEMOCRATIC* (1)
307 | ELECT* (1)
308 | IMPLEMENT* (1)
309 | MODERN* (1)
310 | MONITOR* (1)
311 | REBUILD* (1)
312 | REEXAMINE* (1)
313 | REFORM* (1)
314 | RE-ORGANI* (1)
315 | REPEAL* (1)
316 | REPLACE* (1)
317 | REPRESENTAT* (1)
318 | SCANDAL* (1)
319 | SCRAP (1)
320 | SCRAP* (1)
321 | SCRUTIN* (1)
322 | TRANSFORM* (1)
323 | VOICE* (1)
324 | LAW_AND_ORDER
325 | LAW-CONSERVATIVE
326 | ASSAULTS (1)
327 | BAIL (1)
328 | BURGLAR* (1)
329 | CONSTAB* (1)
330 | CONVICT* (1)
331 | COURT (1)
332 | COURTS (1)
333 | CUSTOD* (1)
334 | DEALING (1)
335 | DELINQUEN* (1)
336 | DETER (1)
337 | DETER* (1)
338 | DISORDER (1)
339 | DRUG* (1)
340 | FINE (1)
341 | FINES (1)
342 | FIRMNESS (1)
343 | FORCE* (1)
344 | FRAUD* (1)
345 | GUARD* (1)
346 | HOOLIGAN* (1)
347 | ILLEGAL* (1)
348 | INTIMIDAT* (1)
349 | JOY-RIDE* (1)
350 | LAWLESS* (1)
351 | MAGISTRAT* (1)
352 | OFFENCE* (1)
353 | OFFICER* (1)
354 | PENAL* (1)
355 | POLICE (1)
356 | POLICEMEN (1)
357 | POLICING (1)
358 | PRISON* (1)
359 | PROBATION (1)
360 | PROSECUTION (1)
361 | PUNISH* (1)
362 | RE-OFFEND (1)
363 | RUC (1)
364 | SEIZ* (1)
365 | SENTENCE* (1)
366 | SHOP-LIFTING (1)
367 | SQUATTING (1)
368 | TERROR* (1)
369 | THEFT* (1)
370 | THUG* (1)
371 | TOUGH* (1)
372 | TRAFFICKER* (1)
373 | UNIFORMED (1)
374 | UNLAWFUL (1)
375 | VANDAL* (1)
376 | VICTIM* (1)
377 | VIGILAN* (1)
378 | LAW-LIBERAL
379 | HARASSMENT (1)
380 | NON-CUSTODIAL (1)
381 | RURAL
382 | AGRICULTUR* (1)
383 | BADGERS (1)
384 | BIRD* (1)
385 | COUNTRYSIDE (1)
386 | FARM* (1)
387 | FEED (1)
388 | FISH* (1)
389 | FOREST* (1)
390 | HENS (1)
391 | HORSE* (1)
392 | LANDSCAPE* (1)
393 | LANE* (1)
394 | LIVESTOCK (1)
395 | MEADOWS (1)
396 | VILLAGE* (1)
397 | WILDLIFE (1)
398 | URBAN
399 | TOWN* (1)
400 | VALUES
401 | CONSERVATIVE
402 | DEFEND (1)
403 | DEFENDED (1)
404 | DEFENDING (1)
405 | DISCIPLINE (1)
406 | GLORIES (1)
407 | GLORIOUS (1)
408 | GRAMMAR (1)
409 | HERITAGE (1)
410 | HISTOR* (1)
411 | HONOUR* (1)
412 | IMMIGRA* (1)
413 | INHERIT* (1)
414 | INTEGRITY (1)
415 | JUBILEE* (1)
416 | LEADER* (1)
417 | MAINTAIN (1)
418 | MAJESTY (1)
419 | MARRIAGE (1)
420 | OBSCEN* (1)
421 | PAST (1)
422 | PORNOGRAPH* (1)
423 | PRESERV* (1)
424 | PRIDE (1)
425 | PRINCIPL* (1)
426 | PROBITY (1)
427 | PROFESSIONALISM (1)
428 | PROUD (1)
429 | PUNCTUAL* (1)
430 | RECAPTURE* (1)
431 | RELIAB* (1)
432 | THREAT* (1)
433 | TRADITION* (1)
434 | LIBERAL
435 | CRUEL* (1)
436 | DISCRIMINAT* (1)
437 | HUMAN* (1)
438 | INJUSTICE* (1)
439 | INNOCENT (1)
440 | INTER_RACIAL (1)
441 | MINORIT* (1)
442 | REPRESSI* (1)
443 | RIGHTS (1)
444 | SEX* (1)
445 |
--------------------------------------------------------------------------------
/sources/Laver-Garry/create-data_dictionary_LaverGarry.R:
--------------------------------------------------------------------------------
1 | # Laver and Garry Dictionary of Policy Positions
2 |
3 | library("quanteda")
4 |
5 | data_dictionary_LaverGarry <- dictionary(file = "Laver-Garry/Laver_and_Garry_2000.cat")
6 |
7 | usethis::use_data(data_dictionary_LaverGarry, overwrite = TRUE)
8 |
--------------------------------------------------------------------------------
/sources/Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R:
--------------------------------------------------------------------------------
1 | # Loughran and McDonald Sentiment Word Lists
2 |
3 | library("quanteda")
4 |
5 | data_dictionary_LoughranMcDonald <- dictionary(file = "Loughran-McDonald/Loughran_and_McDonald_2014.cat")
6 |
7 | polarity(data_dictionary_LoughranMcDonald) <-
8 | list(pos = c("POSITIVE"), neg = c("NEGATIVE"))
9 |
10 | meta(data_dictionary_LoughranMcDonald) <-
11 | list(
12 | title = "Loughran and McDonald Sentiment Word Lists",
13 | description = "A quanteda dictionary object containing the 2014 version of the Loughran and McDonald Sentiment Word Lists. The categories are 'negative' (2355 features), 'positive' (354), 'uncertainty' (297), 'litigious' (903), 'constraining' (184), 'superfluous' (56), 'interesting' (68), 'modal words strong' (68) and 'modal words weak' (0).",
14 | url = "http://sraf.nd.edu/textual-analysis/resources/",
15 | reference = "Loughran, T. & McDonald, B. (2011). When is a Liability not a Liability? Textual Analysis, Dictionaries, and 10-Ks. Journal of Finance, 66(1), 35-65.",
16 | license = "The data compilations provided on this website are for use by individual researchers. For commercial licenses please contact mcdonald.1@nd.edu."
17 | )
18 |
19 | usethis::use_data(data_dictionary_LoughranMcDonald, overwrite = TRUE)
20 |
--------------------------------------------------------------------------------
/sources/MFD/create-data_dictionary_MFD.R:
--------------------------------------------------------------------------------
1 | # Moral Foundations Dictionary
2 |
3 | library("quanteda")
4 |
5 | #data_dictionary_MFD <- dictionary(file = "sources/MFD/moral_foundations_dictionary.dic")
6 | data_dictionary_MFD <- dictionary(file = "MFD/mfd2.0.dic")
7 |
8 | usethis::use_data(data_dictionary_MFD, overwrite = TRUE)
9 |
--------------------------------------------------------------------------------
/sources/MFD/mfd2.0.dic:
--------------------------------------------------------------------------------
1 | %
1 care.virtue
2 care.vice
3 fairness.virtue
4 fairness.vice
5 loyalty.virtue
6 loyalty.vice
7 authority.virtue
8 authority.vice
9 sanctity.virtue
10 sanctity.vice
%
compassion 1
empathy 1
kindness 1
caring 1
generosity 1
benevolence 1
altruism 1
compassionate 1
nurture 1
gentleness 1
nurturance 1
sympathy 1
nurturing 1
motherly 1
love 1
beneficence 1
empathize 1
helpfulness 1
loving 1
pity 1
mercy 1
nurturer 1
compassionately 1
nurturers 1
caringly 1
empathising 1
merciful 1
empathizing 1
nurtures 1
warmhearted 1
empathizers 1
protectiveness 1
nurtured 1
benevolent 1
mothering 1
cared 1
healing 1
empathises 1
humane 1
comfort 1
pitied 1
loved 1
altruist 1
cares 1
pitying 1
comforted 1
hug 1
comforting 1
consoling 1
empathizes 1
sympathize 1
care 1
caregiver 1
empathised 1
hugs 1
heal 1
generous 1
condolences 1
mothered 1
charitable 1
generously 1
pities 1
condolence 1
help 1
consolingly 1
solace 1
mother 1
healer 1
hospitality 1
charity 1
empathized 1
healers 1
pityingly 1
mothers 1
child 1
lovingly 1
parenting 1
rescuing 1
rescuer 1
loves 1
consoled 1
clothe 1
sympathizing 1
helping 1
shared 1
childhood 1
mommy 1
vulnerability 1
helpers 1
lover 1
hospitable 1
sharer 1
feeding 1
nursed 1
helper 1
safeness 1
nurses 1
protector 1
motherhood 1
alleviation 1
nursemaid 1
safeguard 1
protect 1
healthiness 1
protecters 1
patient 1
nurse 1
vulnerable 1
benefit 1
feed 1
childcare 1
rescuers 1
hugged 1
helpful 1
rescues 1
nursing 1
protecting 1
heals 1
childbearing 1
hugger 1
relief 1
healed 1
rescued 1
patients 1
share 1
rescue 1
healthy 1
hospitalise 1
hospitalising 1
hugging 1
nursery 1
healthier 1
sharing 1
helps 1
sympathizers 1
hospitalises 1
alleviating 1
wounded 1
wounds 1
hospitalize 1
alleviate 1
protective 1
protection 1
health 1
relieve 1
sympathizer 1
safety 1
beneficiary 1
helped 1
hospital 1
childbirth 1
benefits 1
healthcare 1
relievers 1
feeds 1
hospitalization 1
benefitting 1
relieving 1
safe 1
feeder 1
benefitted 1
hospitalized 1
unharmful 1
protects 1
unharmed 1
protecter 1
safely 1
safekeeping 1
hospitalizing 1
wounding 1
reliever 1
shares 1
relieves 1
alleviates 1
relieved 1
hospitalizes 1
console 1
protectorate 1
alleviated 1
protected 1
wound 1
consoles 1
harm 2
suffer 2
hurt 2
harmed 2
hurting 2
hurts 2
cruel 2
endanger 2
harming 2
harms 2
suffering 2
threaten 2
inflict 2
suffered 2
harmful 2
inflicted 2
mistreat 2
endangers 2
damaging 2
injurious 2
victimize 2
inflicts 2
hurtful 2
suffers 2
inflicting 2
injures 2
vulnerable 2
unkind 2
damage 2
kill 2
die 2
victimizes 2
torment 2
destroy 2
brutalise 2
brutalises 2
distresses 2
endangering 2
mistreats 2
afflict 2
distressing 2
destroys 2
victimises 2
maltreat 2
pain 2
harsh 2
mistreated 2
ravage 2
threatened 2
harass 2
unkindness 2
afflicted 2
threatens 2
threatening 2
distress 2
brutalize 2
tormenting 2
brutalized 2
victimizing 2
damager 2
damaged 2
bully 2
agony 2
abused 2
coldhearted 2
inhuman 2
injured 2
torments 2
brutalizes 2
uncompassionate 2
cruelty 2
tormented 2
mistreating 2
endangered 2
uncaring 2
anguishes 2
destroying 2
killed 2
mistreatment 2
bullied 2
harsher 2
cruelness 2
tortured 2
pained 2
tortures 2
torturing 2
maltreated 2
anguish 2
persecutes 2
maltreatment 2
brutalizing 2
attacked 2
victim 2
crying 2
damages 2
discomforting 2
abusing 2
threat 2
persecute 2
brutalization 2
violent 2
annihilated 2
torturous 2
harasses 2
injurer 2
destroyed 2
molests 2
molest 2
injuring 2
afflicts 2
killing 2
ache 2
wounded 2
persecuted 2
ravages 2
harassed 2
exploited 2
injury 2
brutalisation 2
discomfort 2
unmerciful 2
annihilate 2
exploiters 2
injurers 2
destruction 2
manhandle 2
kills 2
casualties 2
maltreating 2
victims 2
harassing 2
needier 2
smother 2
harassment 2
smothers 2
unhelpful 2
agonize 2
inhumanity 2
duress 2
victimization 2
exploiting 2
cried 2
wounds 2
wounding 2
murderous 2
ravaged 2
uncaringly 2
pains 2
painfulness 2
manhandles 2
bullies 2
assaulted 2
uncharitable 2
distressed 2
persecution 2
murdered 2
ravaging 2
discomforted 2
exploitation 2
torture 2
murderers 2
aches 2
afflictions 2
ungenerous 2
victimizer 2
agonizing 2
paining 2
persecuting 2
exploit 2
harassers 2
malevolent 2
stabs 2
sorrowful 2
assaults 2
needy 2
affliction 2
cries 2
fighting 2
fight 2
attack 2
annihilates 2
sorrow 2
agonized 2
assaulting 2
inhospitable 2
threats 2
ached 2
rapists 2
abuser 2
raped 2
assassinates 2
stabbed 2
inhospitality 2
annihilation 2
punch 2
harshness 2
abusers 2
killer 2
sufferers 2
victimizers 2
smite 2
killers 2
discomforts 2
fatalities 2
molested 2
brutality 2
murdering 2
torturer 2
torturers 2
fights 2
harmfulness 2
bullying 2
casualty 2
sufferer 2
exploiter 2
fatality 2
punches 2
abuses 2
attacks 2
vulnerability 2
carnage 2
tribulation 2
annihilator 2
smothering 2
bullyboy 2
murderer 2
wound 2
stabber 2
tormenters 2
malevolence 2
raping 2
smothered 2
assault 2
genocidal 2
anguishing 2
aching 2
anguished 2
stabbing 2
rapist 2
harasser 2
hungers 2
hunger 2
molesting 2
rape 2
molesters 2
punched 2
violence 2
distressingly 2
molester 2
stabbers 2
neediness 2
assassinate 2
agonizingly 2
tribulations 2
unhelpfulness 2
assaulter 2
puncher 2
punching 2
rapes 2
genocides 2
attackers 2
tormentor 2
assassinations 2
destroyers 2
punchers 2
sorrows 2
tormenter 2
threateningly 2
murder 2
destroyer 2
assassinating 2
crier 2
assassinated 2
molestation 2
attacker 2
murders 2
genocide 2
fighter 2
assassins 2
assaulters 2
hungering 2
achingly 2
hungered 2
murderess 2
assassin 2
exploits 2
fighters 2
assassination 2
equality 3
fairness 3
justice 3
rights 3
equitable 3
civil rights 3
fairplay 3
impartiality 3
equal 3
fairminded 3
proportionality 3
equalities 3
fair 3
integrity 3
impartial 3
reciprocity 3
honesty 3
egalitarian 3
civil right 3
law 3
justness 3
unbias 3
egalitarians 3
parity 3
objectiveness 3
reparations 3
unprejudiced 3
justices 3
laws 3
tribunals 3
retribution 3
reparation 3
lawfully 3
lawful 3
honest 3
compensation 3
lawyers 3
sportsmanship 3
tribunal 3
do unto others 3
golden rule 3
lawyer 3
proportional 3
equity 3
lawyering 3
trust 3
reciprocal 3
being objective 3
justification 3
trustworthiness 3
unbiased 3
vengeance 3
revenge 3
retributions 3
equals 3
equalize 3
refereeing 3
restitution 3
compensating 3
been objective 3
pay back 3
justified 3
justifies 3
retaliation 3
lawyered 3
compensated 3
referees 3
karma 3
will share 3
avenger 3
trusting 3
avengers 3
square deal 3
trusts 3
compensate 3
trustworthy 3
levels the playing field 3
tit for tat 3
retaliate 3
level the playing field 3
eye for an eye 3
square deals 3
repayment 3
payback 3
equities 3
justify 3
dues 3
square dealing 3
referee 3
repaid 3
square dealer 3
equalizer 3
due processes 3
level playing fields 3
repay 3
compensates 3
justifying 3
due processing 3
due process 3
repayments 3
repaying 3
level playing field 3
retaliating 3
square dealers 3
retaliated 3
refereed 3
revenger 3
avenging 3
repays 3
trusted 3
avenge 3
retaliates 3
equalizers 3
avenged 3
avenges 3
cheat 4
unfair 4
cheating 4
unfairness 4
injustice 4
fraud 4
dishonest 4
unjust 4
cheated 4
fraudulent 4
cheats 4
frauds 4
dishonesty 4
cheaters 4
deception 4
injustices 4
swindle 4
inequity 4
hypocrisy 4
discrimination 4
unequal 4
cheater 4
inequities 4
defraud 4
racism 4
scam 4
liar 4
defrauds 4
betrayal 4
deceipt 4
defrauded 4
inequality 4
liars 4
defrauders 4
hypocrite 4
biased 4
ripoffs 4
scams 4
fleecing 4
defrauder 4
discriminates 4
mislead 4
inequalities 4
prejudice 4
fleeced 4
defrauding 4
ripoff 4
scamming 4
imposters 4
exploitation 4
crooked 4
oppress 4
racist 4
oppression 4
imposter 4
swindled 4
hypocrites 4
plagiarism 4
lied 4
untrustworthiness 4
hoodwink 4
scammed 4
blackmail 4
bilks 4
swindling 4
betrayed 4
bias 4
connive 4
crooks 4
deceive 4
freeloaders 4
favoritism 4
disparity 4
swindles 4
deceived 4
exploiters 4
misleading 4
discriminated 4
bilked 4
deceiving 4
untrustworthy 4
prejudiced 4
false advertise 4
scammers 4
swindler 4
theft 4
duplicitous 4
hoodwinked 4
bigoted 4
sexism 4
disproportionate 4
swindlers 4
discriminate 4
conniving 4
sexist 4
betraying 4
hoodwinking 4
partiality 4
misleads 4
disproportion 4
economic disparity 4
exploiter 4
bilk 4
biases 4
bigots 4
distrust 4
dupe 4
crook 4
racists 4
con artist 4
bilking 4
blackmailing 4
deceives 4
betrayers 4
deceiver 4
blackmailed 4
duping 4
shyster 4
connivers 4
imbalanced 4
con artists 4
sexists 4
thieving 4
betray 4
imbalance 4
disproportions 4
disproportionately 4
freeloader 4
misleaders 4
connived 4
shysters 4
scammer 4
connives 4
conniver 4
disadvantaged 4
plagiaristic 4
moocher 4
dupes 4
discriminating 4
tricked 4
segregation 4
false advertised 4
thief 4
betrayer 4
bigot 4
exploiting 4
lying 4
thieves 4
stealing 4
suckered 4
deceivers 4
bamboozled 4
false advertisement 4
freeload 4
bamboozle 4
did rob 4
freeloading 4
steal 4
pickpocketing 4
blackmailer 4
prejudicing 4
chauvinists 4
exploit 4
misleader 4
hoodwinks 4
false advertiser 4
imbalances 4
pickpocketed 4
exploited 4
pickpockets 4
bamboozles 4
tricking 4
taking advantage 4
pickpocket 4
false advertisers 4
biasing 4
false impression 4
bamboozling 4
false witness 4
robs 4
moochers 4
betrays 4
robbing 4
false advertises 4
false impressions 4
blackmails 4
double cross 4
blackmailers 4
will rob 4
stolen 4
distrustful 4
false advertising 4
double crossers 4
mooches 4
disproportioned 4
mooching 4
segregated 4
double crosser 4
robbed 4
misleadingly 4
segregating 4
stole 4
double crosses 4
being partial 4
exploits 4
distrusts 4
mooch 4
segregate 4
robbers 4
distrusted 4
double crossing 4
distrusting 4
double crossed 4
be partial 4
go back on 4
stacking the deck 4
robber 4
segregates 4
ripping off 4
trickster 4
rips off 4
behind their backs 4
mooched 4
stacked the deck 4
was partial 4
am partial 4
stacked deck 4
stacks the deck 4
behind their back 4
been partial 4
free rider 4
ripped off 4
free riders 4
deceivingly 4
steals 4
unequaled 4
team player 5
player 5
patriot 5
loyal 5
loyalty 5
patriots 5
follower 5
fidelity 5
allegiance 5
ally 5
comrade 5
loyalties 5
death do us part 5
faction 5
comrades 5
allegiances 5
sacrifice 5
allies 5
organization 5
followers 5
us against them 5
sacrifices 5
all for one 5
comradery 5
one for all 5
fellow 5
family 5
allegiant 5
corps 5
unity 5
union jack 5
uniter 5
old glory 5
companions 5
country 5
companion 5
homeland 5
sacrificing 5
indivisible 5
sacrificed 5
solidarity 5
troops 5
nation 5
cult 5
kinship 5
companionship 5
clique 5
allied 5
community 5
group 5
factions 5
familiarity 5
solidarities 5
enlist 5
companionships 5
wife 5
united 5
belongs 5
congregation 5
brothers in arms 5
clan 5
trooper 5
sect 5
enlisted 5
enlistment 5
tribalism 5
cohorts 5
war 5
joining 5
troop 5
sacrificial 5
coalition 5
insider 5
pledge 5
cohort 5
enlisting 5
unite 5
communion 5
familiarities 5
belong 5
ingroup 5
belonged 5
company 5
collective 5
fellows 5
cliques 5
uniting 5
clans 5
hazing 5
congregates 5
herd 5
sects 5
uniters 5
undivided 5
unites 5
pledgers 5
coalitions 5
enlists 5
grouping 5
insiders 5
families 5
troupe 5
fellowship 5
kin 5
pledger 5
horde 5
nations 5
tribe 5
hordes 5
pledges 5
herder 5
commune 5
cults 5
congregations 5
organizations 5
herds 5
pledging 5
communities 5
familiar 5
hazings 5
belonging 5
pledged 5
bowed 5
collectively 5
together 5
groups 5
homelands 5
collectives 5
troopers 5
tribes 5
companies 5
countries 5
troupes 5
fellowships 5
tribal 5
communes 5
herders 5
grouped 5
herding 5
congregate 5
herded 5
congregating 5
traitor 6
disloyal 6
treason 6
traitors 6
betray 6
betraying 6
betrayer 6
betrayers 6
unpatriotic 6
betrayed 6
treachery 6
enemies 6
backstabber 6
backstabbed 6
heretic 6
enemy 6
betrays 6
deserter 6
infidels 6
infidel 6
backstab 6
deserting 6
apostate 6
heresy 6
backstabbers 6
heretics 6
unfaithful 6
rebellion 6
desertion 6
deserters 6
apostates 6
unfaithfulness 6
backstabbing 6
rebel 6
cheat on 6
treacherous 6
backstabs 6
heresies 6
outsider 6
outgroup 6
cheated on 6
against us 6
cheating on 6
rebels 6
infidelity 6
outgroups 6
rebellions 6
outsiders 6
cheats on 6
respect 7
obey 7
authority 7
obeyed 7
deference 7
reverence 7
respecting 7
obeying 7
tradition 7
adhere 7
obeys 7
revere 7
govern 7
comply 7
respectful 7
honor 7
adhered 7
allegiance 7
dictates 7
nobility 7
forbid 7
dominion 7
governed 7
obedient 7
reveres 7
adhering 7
governs 7
governing 7
oppress 7
respected 7
respectfully 7
honorable 7
dictate 7
commandments 7
commandment 7
venerate 7
politeness 7
respects 7
obedience 7
divine right 7
forbids 7
permission 7
veneration 7
hierarchy 7
forbade 7
honoring 7
proper 7
venerated 7
stature 7
acquiesce 7
adherence 7
deferential 7
leadership 7
punish 7
forbidding 7
revered 7
filial piety 7
patriarchs 7
decree 7
coerce 7
dominions 7
dictating 7
venerating 7
wear the crown 7
venerates 7
institution 7
monarchical 7
servant 7
decrees 7
permit 7
do as one says 7
supervise 7
duty 7
compliance 7
lionize 7
supervision 7
take orders 7
take up arms 7
duties 7
dictated 7
elders 7
emperors 7
commands 7
acquiesced 7
emperor 7
adheres 7
servants 7
regulations 7
covenant 7
hierarchical 7
subordinate 7
policing 7
decreeing 7
acquiesces 7
authorizing 7
nobles 7
permits 7
matriarchal 7
authorizes 7
control 7
command 7
subordinating 7
hierarchies 7
reverential 7
deferentially 7
punishes 7
patriarch 7
empires 7
honored 7
allegiant 7
protect 7
traditional 7
subordination 7
punished 7
noble 7
order 7
worship 7
social order 7
monarchs 7
ruling 7
lead by example 7
authorities 7
guiding 7
presidents 7
slavishly 7
patriarchy 7
subordinates 7
protection 7
supervisers 7
bow before 7
fathers 7
bow down 7
institutions 7
coersion 7
governors 7
commanded 7
police 7
authorize 7
bullys 7
bully 7
protecting 7
acquiescing 7
empire 7
mentor 7
chiefs 7
monarchies 7
honors 7
preside over 7
acquiescent 7
allegiances 7
bowing 7
oligarchy 7
willing 7
polite 7
supervising 7
pecking order 7
compliantly 7
bishops 7
monarch 7
slaves 7
traitors 7
punishments 7
authorized 7
protector 7
compliant 7
dutiful 7
father 7
punishment 7
coerces 7
toe the line 7
monarchy 7
obediently 7
elder 7
oligarchies 7
dictators 7
leaders 7
bishop 7
lorded over 7
worships 7
coercing 7
protectors 7
dictator 7
protected 7
punishing 7
traitor 7
commanding 7
coerced 7
commanders 7
pope 7
punitive 7
underlings 7
master 7
subordinated 7
president 7
in charge 7
matriarchy 7
lionizing 7
slave 7
chief 7
covenants 7
commander 7
matriarch 7
authorizer 7
guide 7
ordered 7
supervised 7
captains 7
punisher 7
supervises 7
bossing 7
commandant 7
governor 7
protects 7
admiral 7
top gun 7
bowed 7
dominate 7
arrest 7
mentored 7
ordering 7
submit 7
institutional 7
prime minister 7
lionizes 7
ranking 7
boss 7
captain 7
by the book 7
mentors 7
bullies 7
dominant 7
arrested 7
bossed 7
leader 7
rank 7
arresting 7
chieftain 7
prime ministers 7
regulation 7
superviser 7
dean 7
arrests 7
punishers 7
bullied 7
matriarchs 7
controlling 7
managerial 7
bosses 7
ranks 7
controls 7
dictation 7
guides 7
oligarchs 7
principals 7
top dog 7
admirals 7
caste 7
captaining 7
queen 7
mentoring 7
elderly 7
castes 7
governess 7
captained 7
principal 7
bullying 7
submission 7
dominated 7
corporate ladders 7
queens 7
underling 7
corporate ladder 7
fathered 7
dominates 7
dominating 7
presidential 7
oligarch 7
controlled 7
submits 7
submitting 7
head honcho 7
commandingly 7
vice president 7
slaving 7
fathering 7
slaved 7
managers 7
forbiddingly 7
controllers 7
submitted 7
ringleaders 7
ringleader 7
controller 7
ranked 7
manager 7
prime ministerial 7
submissions 7
ceo 7
punishingly 7
submitter 7
submitters 7
disrespect 8
disobey 8
disobedience 8
anarchy 8
chaos 8
subversion 8
subvert 8
lawlessness 8
subverting 8
disrespecting 8
sedition 8
treason 8
overthrow 8
insurrection 8
rebellion 8
transgress 8
treachery 8
dissent 8
dishonor 8
dissention 8
disrespects 8
bedlam 8
rebelling 8
misrule 8
transgression 8
insurrectional 8
pandemonium 8
mutiny 8
mutinies 8
misruling 8
disobedient 8
subverted 8
transgresses 8
transgressed 8
disarray 8
misruled 8
rioting 8
lawless 8
transgressing 8
illegality 8
overthrowing 8
dishonorable 8
dishonoring 8
rebelled 8
rebellions 8
riot 8
dishonouring 8
disrespected 8
permissiveness 8
refuser 8
unruly 8
subverts 8
unlawfulness 8
overthrown 8
anarchistic 8
dishonours 8
riots 8
refuse 8
chaotic 8
nonconformity 8
dissenters 8
uprising 8
insurrections 8
rioters 8
disordering 8
insubordinate 8
mutinied 8
insurrectionist 8
unlawful 8
nonconformists 8
heresy 8
uprisings 8
dishonors 8
tumult 8
overthrew 8
overthrows 8
rabble rousers 8
renegade 8
impolite 8
renegades 8
rabble rousing 8
dishonored 8
illegal 8
rioter 8
mutinous 8
disarrayed 8
apostates 8
dissidents 8
anarchists 8
raise hell 8
disorder 8
refusers 8
permissive 8
apostate 8
anarchist 8
treacherous 8
dissident 8
raises hell 8
disordered 8
heretic 8
overpower 8
rabble rouser 8
rebel 8
raising hell 8
heretics 8
unathorized 8
refusing 8
rebels 8
refuses 8
rioted 8
orders 8
dissenter 8
chaotically 8
nonconformist 8
heresies 8
illegals 8
unlawfully 8
heretical 8
dissents 8
traditions 8
dissenting 8
overpowers 8
trouble maker 8
refused 8
rock the boat 8
overpowering 8
tumultuous 8
overpowered 8
dissented 8
nonconforming 8
sanctity 9
sacred 9
sacredness 9
purity 9
wholesome 9
pureness 9
wholesomeness 9
holiness 9
dignity 9
godly 9
piety 9
sanctify 9
chastity 9
undefiled 9
holy 9
sacrosanct 9
pious 9
righteousness 9
dignities 9
sanctified 9
godliness 9
spirituality 9
chaste 9
sanctifies 9
righteous 9
divine 9
religious 9
biblical 9
spiritual 9
deity 9
sanctifying 9
noble 9
modesty 9
decency 9
scriptures 9
nobility 9
religion 9
hallow 9
soul 9
hallowed 9
deism 9
pristine 9
exalted 9
hallowing 9
eternal 9
holy cross 9
deities 9
faith 9
unadulterated 9
scripture 9
wholesomely 9
divinities 9
worship 9
virgin 9
god 9
catholicism 9
saintly 9
saintliness 9
godess 9
religiosity 9
purify 9
koranic 9
pure 9
holy crosses 9
exalt 9
virginity 9
divinity 9
consecrates 9
heaven 9
virginal 9
devout 9
dignified 9
tabernacle 9
exalts 9
buddhas 9
souls 9
temple 9
unsullied 9
heavenly 9
cleanliness 9
abstinance 9
spotlessness 9
talmudic 9
deists 9
gospels 9
prophets 9
religions 9
temples 9
buddhist 9
godesses 9
saints 9
temperance 9
celibacy 9
consecrated 9
priestly 9
bless 9
marriage 9
prophet 9
exalting 9
unchaste 9
supernatural 9
eternally 9
purification 9
apostles 9
monastic 9
purified 9
communion 9
gods 9
celibate 9
christians 9
theological 9
monasticism 9
unspoiled 9
sterility 9
christian 9
buddha 9
deist 9
prophetic 9
saint 9
righteously 9
apostle 9
prayer 9
faiths 9
shrine 9
purifying 9
worships 9
virgins 9
glorious 9
dignifies 9
atonement 9
deification 9
orthodoxy 9
hallows 9
enshrining 9
nunneries 9
church 9
religiously 9
blessings 9
consecrate 9
gospel 9
pray 9
beatifying 9
yogis 9
theology 9
purifies 9
orthodox 9
untainted 9
torah 9
faithfully 9
catholic 9
heavens 9
yogi 9
consecrating 9
blessed 9
faithful 9
koran 9
abstinence 9
jesus 9
monastery 9
purities 9
consecration 9
catholics 9
prayers 9
prayed 9
sterile 9
blesses 9
enshrined 9
torahs 9
organic 9
bible 9
glory 9
allah 9
glories 9
priests 9
dignifying 9
enshrine 9
mosques 9
spotlessly 9
prude 9
reverend 9
soulful 9
deify 9
christ 9
cathedrals 9
churches 9
cathedral 9
dignify 9
monasteries 9
raw 9
enshrines 9
refinement 9
nuns 9
monks 9
gloriously 9
almighty 9
marring 9
repent 9
prays 9
clean 9
orthodoxies 9
exterminates 9
rabbis 9
spotless 9
bibles 9
mosque 9
immaculate 9
organics 9
purifier 9
foods 9
lord 9
praying 9
repenting 9
marry 9
elevating 9
marrying 9
immaculately 9
rabbi 9
nunnery 9
priest 9
food 9
bloodiness 9
marries 9
synagogues 9
synagogue 9
refined 9
repents 9
angel 9
blessing 9
monk 9
rabbinical 9
organically 9
pope 9
nun 9
nobles 9
prophetically 9
blood 9
repented 9
pastor 9
purifiers 9
lords 9
bloody 9
untouched 9
cleaning 9
exterminating 9
exterminated 9
imam 9
higher power 9
cleaners 9
married 9
beatification 9
beatify 9
extermination 9
exterminate 9
cleaner 9
body 9
immune 9
atoning 9
imams 9
cleaned 9
atones 9
mary 9
refines 9
cleans 9
atone 9
immunities 9
immunity 9
stainless 9
refining 9
refine 9
atoned 9
exterminator 9
exterminators 9
impurity 10
degradation 10
depravity 10
desecrate 10
desecration 10
repulsiveness 10
degrading 10
decay 10
filth 10
depravities 10
defile 10
sin 10
fornication 10
repulsive 10
depraved 10
impiety 10
degrade 10
repugnance 10
impure 10
degraded 10
desecrations 10
sinfulness 10
impurities 10
indecencies 10
defiled 10
defiles 10
uncleanliness 10
damnation 10
debauchery 10
impious 10
sinful 10
necrophiliacs 10
desecrates 10
sleaziness 10
desecrating 10
desecrated 10
grossness 10
contaminates 10
sinning 10
promiscuity 10
befouls 10
rottenness 10
hedonism 10
revolting 10
repugnant 10
godless 10
scum 10
befoul 10
satanic 10
sluttiness 10
disgusting 10
pestilence 10
debased 10
trashiness 10
sins 10
degradingly 10
corrupting 10
deprave 10
perverted 10
debase 10
fornicating 10
degraders 10
defiling 10
slime 10
horrors 10
repugnantly 10
defiler 10
deviants 10
degrades 10
corrupts 10
debasing 10
perverts 10
parasitic 10
disgusts 10
deflowering 10
hedonistic 10
deviant 10
scummy 10
horrifying 10
necrophilia 10
contamination 10
rot 10
stain 10
contaminating 10
contaminants 10
dirtying 10
debases 10
contaminate 10
abhor 10
heresy 10
sleaze 10
staining 10
defilers 10
harlot 10
plagues 10
sullies 10
fornicators 10
vermin 10
befouling 10
incest 10
trashy 10
excreting 10
deforms 10
abhored 10
decayed 10
whores 10
deformities 10
perverse 10
adultery 10
fornicate 10
excrement 10
harlots 10
decaying 10
fornicator 10
unclean 10
nauseating 10
sully 10
heresies 10
satan 10
damns 10
satanically 10
sinned 10
sinners 10
adulterous 10
repulses 10
corruption 10
tainting 10
deformity 10
necrophiliac 10
decays 10
corrupted 10
deforming 10
contaminant 10
disgust 10
tarnishes 10
hell 10
filthy 10
taint 10
horrific 10
fecal 10
dirtied 10
flesh 10
stained 10
deform 10
putrid 10
scatalogical 10
dirties 10
whoring 10
cocksucker 10
plague 10
adulterers 10
excretes 10
infesting 10
slimy 10
excrete 10
scuzz 10
horror 10
tarnish 10
sexuality 10
parasite 10
obscenity 10
deformed 10
adulterer 10
befouled 10
muck 10
corpses 10
soiled 10
infest 10
incestuously 10
incestuous 10
fucker 10
devil 10
parasites 10
stains 10
skanks 10
corpse 10
whore 10
lepers 10
curses 10
corrupt 10
pathogens 10
diseased 10
deflower 10
hedonists 10
sinner 10
debaucherous 10
fester 10
hedonist 10
sleazy 10
fucks 10
promiscuous 10
cursed 10
curse 10
apostates 10
cocksuckers 10
heretic 10
lewdness 10
slutty 10
infests 10
festers 10
pervert 10
fuck 10
skanky 10
dirty 10
mucky 10
puke 10
alcoholism 10
feces 10
sullied 10
disgustingly 10
sexual 10
cunt 10
taints 10
profane 10
heretics 10
fucking 10
tarnishing 10
fornicated 10
mar 10
shitting 10
slut 10
obscene 10
barf 10
rotten 10
disgusted 10
cunts 10
waste 10
parasitically 10
sinfully 10
wastes 10
vomit 10
pathogen 10
rats 10
pathogenic 10
indecent 10
infect 10
leper 10
indecently 10
shit 10
abhors 10
skank 10
infestation 10
deflowered 10
leprosy 10
diseases 10
heretical 10
dirt 10
cursing 10
tarnishment 10
disease 10
prostitution 10
infested 10
apostate 10
sluts 10
fuckers 10
profanity 10
addiction 10
contaminated 10
scuzzy 10
infectiousness 10
indecency 10
vomitted 10
germ 10
prostituting 10
excreted 10
rubbish 10
fucked 10
sodomy 10
untouchables 10
epidemics 10
swear 10
shits 10
whorehouses 10
pigsty 10
germs 10
prostituted 10
mud 10
dung 10
epidemic 10
rat 10
douchebag 10
perversely 10
pukes 10
puking 10
prostitutes 10
barfs 10
slutting 10
trashing 10
whored 10
douchebags 10
infection 10
shite 10
spoil 10
gross 10
repulsed 10
pus 10
festering 10
cockroaches 10
tainted 10
contagion 10
barfed 10
infects 10
damned 10
addictions 10
shitty 10
skanking 10
trash 10
whorehouse 10
phlegm 10
moldy 10
plaguing 10
shat 10
drugged 10
garbage 10
infecting 10
pandemics 10
viruses 10
nauseated 10
cockroach 10
puked 10
drugging 10
manure 10
mucking 10
lewd 10
alcoholics 10
gangrenous 10
barfing 10
gangrene 10
shitter 10
shittier 10
tarnished 10
cock 10
vomits 10
hookers 10
damn 10
addict 10
alcoholic 10
nausea 10
swearing 10
vomitting 10
skanked 10
infections 10
foul 10
prostitute 10
risque 10
lice 10
gonorrhea 10
wasting 10
profanities 10
divorces 10
crappy 10
spreading 10
wasters 10
addicting 10
trashed 10
addicts 10
scabies 10
swore 10
nauseous 10
phlegmatically 10
spoiling 10
nauseatingly 10
drugs 10
virus 10
waster 10
untouchable 10
addicted 10
damning 10
pandemic 10
hooker 10
bm 10
infected 10
festered 10
marred 10
phlegmatic 10
divorce 10
viral 10
contagiously 10
plagued 10
repulsing 10
swears 10
drug 10
spoiled 10
cum 10
divorcing 10
wasted 10
divorced 10
contagious 10
--------------------------------------------------------------------------------
/sources/MFD/moral_foundations_dictionary.dic:
--------------------------------------------------------------------------------
1 | %
2 | 01 HarmVirtue
3 | 02 HarmVice
4 | 03 FairnessVirtue
5 | 04 FairnessVice
6 | 05 IngroupVirtue
7 | 06 IngroupVice
8 | 07 AuthorityVirtue
9 | 08 AuthorityVice
10 | 09 PurityVirtue
11 | 10 PurityVice
12 | 11 MoralityGeneral
13 | %
14 | safe* 01
15 | peace* 01
16 | compassion* 01
17 | empath* 01
18 | sympath* 01
19 | care 01
20 | caring 01
21 | protect* 01
22 | shield 01
23 | shelter 01
24 | amity 01
25 | secur* 01
26 | benefit* 01
27 | defen* 01
28 | guard* 01
29 | preserve 01 07 09
30 |
31 | harm* 02
32 | suffer* 02
33 | war 02
34 | wars 02
35 | warl* 02
36 | warring 02
37 | fight* 02
38 | violen* 02
39 | hurt* 02
40 | kill 02
41 | kills 02
42 | killer* 02
43 | killed 02
44 | killing 02
45 | endanger* 02
46 | cruel* 02
47 | brutal* 02
48 | abuse* 02
49 | damag* 02
50 | ruin* 02 10
51 | ravage 02
52 | detriment* 02
53 | crush* 02
54 | attack* 02
55 | annihilate* 02
56 | destroy 02
57 | stomp 02
58 | abandon* 02 06
59 | spurn 02
60 | impair 02
61 | exploit 02 10
62 | exploits 02 10
63 | exploited 02 10
64 | exploiting 02 10
65 | wound* 02
66 |
67 | fair 03
68 | fairly 03
69 | fairness 03
70 | fair-* 03
71 | fairmind* 03
72 | fairplay 03
73 | equal* 03
74 | justice 03
75 | justness 03
76 | justifi* 03
77 | reciproc* 03
78 | impartial* 03
79 | egalitar* 03
80 | rights 03
81 | equity 03
82 | evenness 03
83 | equivalent 03
84 | unbias* 03
85 | tolerant 03
86 | equable 03
87 | balance* 03
88 | homologous 03
89 | unprejudice* 03
90 | reasonable 03
91 | constant 03
92 | honest* 03 11
93 |
94 | unfair* 04
95 | unequal* 04
96 | bias* 04
97 | unjust* 04
98 | injust* 04
99 | bigot* 04
100 | discriminat* 04
101 | disproportion* 04
102 | inequitable 04
103 | prejud* 04
104 | dishonest 04
105 | unscrupulous 04
106 | dissociate 04
107 | preference 04
108 | favoritism 04
109 | segregat* 04 05
110 | exclusion 04
111 | exclud* 04
112 | together 05
113 | nation* 05
114 | homeland* 05
115 | family 05
116 | families 05
117 | familial 05
118 | group 05
119 | loyal* 05 07
120 | patriot* 05
121 | communal 05
122 | commune* 05
123 | communit* 05
124 | communis* 05
125 | comrad* 05
126 | cadre 05
127 | collectiv* 05
128 | joint 05
129 | unison 05
130 | unite* 05
131 | fellow* 05
132 | guild 05
133 | solidarity 05
134 | devot* 05
135 | member 05
136 | cliqu* 05
137 | cohort 05
138 | ally 05
139 | insider 05
140 | foreign* 06
141 | enem* 06
142 | betray* 06 08
143 | treason* 06 08
144 | traitor* 06 08
145 | treacher* 06 08
146 | disloyal* 06 08
147 | individual* 06
148 | apostasy 06 08 10
149 | apostate 06 08 10
150 | deserted 06 08
151 | deserter* 06 08
152 | deserting 06 08
153 | deceiv* 06
154 | jilt* 06
155 | imposter 06
156 | miscreant 06
157 | spy 06
158 | sequester 06
159 | renegade 06
160 | terroris* 06
161 | immigra* 06
162 | obey* 07
163 | obedien* 07
164 | duty 07
165 | law 07
166 | lawful* 07 11
167 | legal* 07 11
168 | duti* 07
169 | honor* 07
170 | respect 07
171 | respectful* 07
172 | respected 07
173 | respects 07
174 | order* 07
175 | father* 07
176 | mother 07
177 | motherl* 07
178 | mothering 07
179 | mothers 07
180 | tradition* 07
181 | hierarch* 07
182 | authorit* 07
183 | permit 07
184 | permission 07
185 | status* 07
186 | rank* 07
187 | leader* 07
188 | class 07
189 | bourgeoisie 07
190 | caste* 07
191 | position 07
192 | complian* 07
193 | command 07
194 | supremacy 07
195 | control 07
196 | submi* 07
197 | allegian* 07
198 | serve 07
199 | abide 07
200 | defere* 07
201 | defer 07
202 | revere* 07
203 | venerat* 07
204 | comply 07
205 | defian* 08
206 | rebel* 08
207 | dissent* 08
208 | subver* 08
209 | disrespect* 08
210 | disobe* 08
211 | sediti* 08
212 | agitat* 08
213 | insubordinat* 08
214 | illegal* 08
215 | lawless* 08
216 | insurgent 08
217 | mutinous 08
218 | defy* 08
219 | dissident 08
220 | unfaithful 08
221 | alienate 08
222 | defector 08
223 | heretic* 08 10
224 | nonconformist 08
225 | oppose 08
226 | protest 08
227 | refuse 08
228 | denounce 08
229 | remonstrate 08
230 | riot* 08
231 | obstruct 08
232 | piety 09 11
233 | pious 09 11
234 | purity 09
235 | pure* 09
236 | clean* 09
237 | steril* 09
238 | sacred* 09
239 | chast* 09
240 | holy 09
241 | holiness 09
242 | saint* 09
243 | wholesome* 09 11
244 | celiba* 09
245 | abstention 09
246 | virgin 09
247 | virgins 09
248 | virginity 09
249 | virginal 09
250 | austerity 09
251 | integrity 09 11
252 | modesty 09
253 | abstinen* 09
254 | abstemiousness 09
255 | upright 09 11
256 | limpid 09
257 | unadulterated 09
258 | maiden 09
259 | virtuous 09
260 | refined 09
261 | decen* 09 11
262 | immaculate 09
263 | innocent 09
264 | pristine 09
265 | church* 09
266 | disgust* 10
267 | deprav* 10
268 | disease* 10
269 | unclean* 10
270 | contagio* 10
271 | indecen* 10 11
272 | sin 10
273 | sinful* 10
274 | sinner* 10
275 | sins 10
276 | sinned 10
277 | sinning 10
278 | slut* 10
279 | whore 10
280 | dirt* 10
281 | impiety 10
282 | impious 10
283 | profan* 10
284 | gross 10
285 | repuls* 10
286 | sick* 10
287 | promiscu* 10
288 | lewd* 10
289 | adulter* 10
290 | debauche* 10
291 | defile* 10
292 | tramp 10
293 | prostitut* 10
294 | unchaste 10
295 | intemperate 10
296 | wanton 10
297 | profligate 10
298 | filth* 10
299 | trashy 10
300 | obscen* 10
301 | lax 10
302 | taint* 10
303 | stain* 10
304 | tarnish* 10
305 | debase* 10
306 | desecrat* 10
307 | wicked* 10 11
308 | blemish 10
309 | exploitat* 10
310 | pervert 10
311 | wretched* 10 11
312 | righteous* 11
313 | moral* 11
314 | ethic* 11
315 | value* 11
316 | upstanding 11
317 | good 11
318 | goodness 11
319 | principle* 11
320 | blameless 11
321 | exemplary 11
322 | lesson 11
323 | canon 11
324 | doctrine 11
325 | noble 11
326 | worth* 11
327 | ideal* 11
328 | praiseworthy 11
329 | commendable 11
330 | character 11
331 | proper 11
332 | laudable 11
333 | correct 11
334 | wrong* 11
335 | evil 11
336 | immoral* 11
337 | bad 11
338 | offend* 11
339 | offensive* 11
340 | transgress* 11
341 |
--------------------------------------------------------------------------------
/sources/NRC/create_data_dictionary-NRC.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 |
3 | class(data_dictionary_NRC) <- "dictionary2"
4 | data_dictionary_NRC <- as.dictionary(data_dictionary_NRC)
5 |
6 | meta(data_dictionary_NRC) <-
7 | list(
8 | title = "NRC Word-Emotion Association Lexicon",
9 | description = "A quanteda dictionary object containing Mohammad and Charron's (2010, 2013) English version of the NRC Word-Emotion Association Lexicon (aka NRC Emotion Lexicon aka EmoLex): association of words with eight emotions (anger, fear, anticipation, trust, surprise, sadness, joy, and disgust) and two sentiments (negative and positive) manually annotated on Amazon's Mechanical Turk. Available in 40 different languages.",
10 | url = "http://saifmohammad.com/WebPages/AccessResource.htm",
11 | description = "A quanteda dictionary object containing 2,006 positive and 4,783 negative words from Hu and Liu (2004, 2005).",
12 | license = "Open, or for commercial for CAD $975.",
13 | reference = "Mohammad, S. & Turney, P. (2013). Crowdsourcing a Word-Emotion Association Lexicon. _Computational Intelligence_, 29(3), 436--465. https://arxiv.org/abs/1308.6297
14 |
15 | Mohammad, S. & Turney, P. (2010). Emotions Evoked by Common Words and Phrases: Using Mechanical Turk to Create an Emotion Lexicon. In _Proceedings of the NAACL-HLT 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text_, June 2010, LA, California. https://dl.acm.org/doi/10.5555/1860631.1860635"
16 | )
17 |
18 | polarity(data_dictionary_NRC) <- list(pos = c("positive"), neg = c("negative"))
19 |
20 | usethis::use_data(data_dictionary_NRC, overwrite = TRUE)
21 |
--------------------------------------------------------------------------------
/sources/RID/create-data_dictionary_RID.R:
--------------------------------------------------------------------------------
1 | # Regressive Imagery Dictionary
2 |
3 | library("quanteda")
4 |
5 | data_dictionary_RID <-
6 | dictionary(file = "RID/RID.CAT")
7 | devtools::use_data(data_dictionary_RID, overwrite = TRUE)
8 |
--------------------------------------------------------------------------------
/sources/Rauh/Rauh_SentDictionaryGerman.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman.Rdata
--------------------------------------------------------------------------------
/sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/Rauh/Rauh_SentDictionaryGerman_Negation.Rdata
--------------------------------------------------------------------------------
/sources/Rauh/create-data_dictionary_Rauh.R:
--------------------------------------------------------------------------------
1 | #' Rauh's German Political Sentiment Dictionary
2 |
3 | library("quanteda")
4 | library("dplyr")
5 |
6 | # load dictionary dataframes (downloaded here: https://doi.org/10.7910/DVN/BKBX)
7 | load("Rauh/Rauh_SentDictionaryGerman_Negation.Rdata")
8 | load("Rauh/Rauh_SentDictionaryGerman.Rdata")
9 |
10 | # new column where NOT and word are divided with a space
11 | neg.sent.dictionary <- neg.sent.dictionary %>%
12 | mutate(word = gsub("NOT_", "NOT ", feature)) %>%
13 | mutate(sentiment = ifelse(sentiment == 1, "neg_negative", "neg_positive"))
14 |
15 | sent.dictionary <- sent.dictionary %>%
16 | mutate(word = feature) %>%
17 | mutate(sentiment = ifelse(sentiment == -1, "negative", "positive"))
18 |
19 | # bind both dataframes
20 | sent_dictionary_rauh <- bind_rows(sent.dictionary, neg.sent.dictionary)
21 |
22 | # save as quanteda dictionary (word and sentiment column)
23 | data_dictionary_Rauh <- quanteda::as.dictionary(sent_dictionary_rauh)
24 |
25 | data_dictionary_Rauh <- as.dictionary(data_dictionary_Rauh)
26 | meta(data_dictionary_Rauh) <-
27 | list(
28 | title = "Rauh's German Political Sentiment Dictionary",
29 | description = "A quanteda dictionary object containing the dictionaries provided in Rauh (forthcoming). Rauh assesses its performance against human intuition of sentiment in German political language (parliamentary speeches, party manifestos, and media coverage). The resource builds on, harmonizes and extends the SentiWS (Remus et al. 2010) and GermanPolarityClues (Waltinger 2010) dictionaries. In order to use the negation correction provided by the dictionary, currently a combination of tokens_replace and tokens_compound is required to harmonize the five covered bi-gram patterns prior to scoring. The example below shows how to conduct this transformation. Note that the process changes the terms 'nicht|nichts|kein|keine|keinen' to a joint term altering some of the features of the original corpus.",
30 | url = "https://doi.org/10.7910/DVN/BKBXWD",
31 | reference = "Rauh, C. (2018). Validating a Sentiment Dictionary for German Political Language: A Workbench Note. Journal of Information Technology & Politics, 15(4), 319-343.
32 |
33 | Remus, R., Quasthoff U., & Heyer, G. (2010). \"SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.\" In Proceedings of the 7th International Language Resources and Evaluation (LREC'10), 1168-1171.
34 |
35 | Waltinger, U. (2010). \"GermanPolarityClues: A Lexical Resource for German Sentiment Analysis.\" In International Conference on Language Resources and Evaluation, 17-23 May 2010 LREC'10.",
36 | license = "Unknown"
37 | )
38 |
39 |
40 | polarity(data_dictionary_Rauh) <-
41 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
42 |
43 | usethis::use_data(data_dictionary_Rauh, overwrite = TRUE)
44 |
--------------------------------------------------------------------------------
/sources/geninquirer/create-data_dictionary_geninquirer.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 |
3 | geninquirer <- read.csv("geninquirer/inquireraugmented.csv",
4 | stringsAsFactors = FALSE, comment.char = "")
5 | GIpos <-
6 | c(geninquirer$Entry[geninquirer$Positiv == "Positiv"],
7 | geninquirer$Entry[geninquirer$Yes == "Yes"]) %>%
8 | char_tolower %>%
9 | stringi::stri_replace_all_regex("#\\w+$", "") %>%
10 | unique
11 | GIneg <-
12 | c(geninquirer$Entry[geninquirer$Negativ == "Negativ"],
13 | geninquirer$Entry[geninquirer$No == "No"]) %>%
14 | char_tolower %>%
15 | stringi::stri_replace_all_regex("#\\w+$", "") %>%
16 | unique
17 | data_dictionary_geninqposneg <-
18 | dictionary(list(positive = GIpos, negative = GIneg))
19 |
20 | meta(data_dictionary_geninqposneg) <-
21 | list(
22 | title = "Augmented General Inquirer Positiv and Negativ dictionary",
23 | url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm",
24 | description = "A lexicon containing the Positiv and Negativ dictionary entries from the augmented
25 | General Inquirer. These are new valence categories described at
26 | `http://www.wjh.harvard.edu/~inquirer/homecat.htm` but also include the
27 | terms from the 'yes' and
28 | 'no' dictionary entries.",
29 | url = "http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm",
30 | license = "Open, but email the creators for commercial use. Many more categories are available.",
31 | reference = "Stone, P.J., Dunphy, C.D., & Smith, M.S. (1966). _The General Inquirer: A Computer Approach to Content Analysis._ Cambridge, MA: MIT Press."
32 | )
33 |
34 | polarity(data_dictionary_geninqposneg) <-
35 | list(pos = "positive", neg = "negative")
36 |
37 |
38 | usethis::use_data(data_dictionary_geninqposneg, overwrite = TRUE)
39 |
--------------------------------------------------------------------------------
/sources/make_sentiment_dictionaries.R:
--------------------------------------------------------------------------------
1 | ## (re)make all sentiment dictionaries
2 |
3 | library("quanteda")
4 |
5 | source("AFINN/create-data_dictionary_AFINN.R")
6 | source("ANEW/create-data_dictionary_ANEW.R")
7 | source("geninquirer/create-data_dictionary_geninquirer.R")
8 | source("Hu-Liu/create_data_dictionary-HuLiu.R")
9 | source("Loughran-McDonald/create-data_dictionary_LoughranMcDonald.R")
10 | source("NRC/create_data_dictionary-NRC.R")
11 | source("Rauh/create-data_dictionary_Rauh.R")
12 | source("sentiws/create-data_dictionary_sentiws.R")
13 |
14 | ## not sentiment dictionaries
15 | # source("Laver-Garry/create-data_dictionary_LaverGarry.R")
16 | # source("MFD/create-data_dictionary_MFD.R")
17 | # source("RID/create-data_dictionary_RID.R")
18 |
19 | # LSD
20 | data("data_dictionary_LSD2015", package = "quanteda")
21 | polarity(data_dictionary_LSD2015) <-
22 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
23 | names(meta(data_dictionary_LSD2015))[which(names(meta(data_dictionary_LSD2015)) == "source")] <- "reference"
24 | usethis::use_data(data_dictionary_LSD2015, overwrite = TRUE)
25 |
--------------------------------------------------------------------------------
/sources/sentiws/create-data_dictionary_sentiws.R:
--------------------------------------------------------------------------------
1 | # SentiWS Dictionary
2 |
3 | library("quanteda")
4 | library("dplyr")
5 | library("tidyr")
6 | library("stringr")
7 |
8 | read_senti_scores <- function(filename) {
9 |
10 | results <- read.delim(filename, header = FALSE, encoding="UTF-8") %>%
11 | cbind(str_split_fixed(.$V3, "[,-]",50),stringsAsFactors = FALSE) %>%
12 | mutate(
13 | V1 = str_sub(str_match(V1,".*\\|"),1,-2),
14 | nr = row_number()
15 | ) %>%
16 | select(-V3) %>%
17 | mutate(nr = as.character(nr)) %>%
18 | gather(wordstem,word,V1,1:48, -nr,-V2) %>%
19 | select(word,V2) %>% rename(score=V2) %>%
20 | filter(word != "") %>%
21 | arrange(word)
22 |
23 | }
24 |
25 | positive <- read_senti_scores("sentiws/SentiWS_v1.8c_Positive.txt") %>%
26 | mutate(sentiment = "positive") %>%
27 | unique()
28 | negative <- read_senti_scores("sentiws/SentiWS_v1.8c_Negative.txt") %>%
29 | mutate(sentiment = "negative") %>%
30 | unique()
31 | sentis <- bind_rows(positive, negative)
32 |
33 | data_dictionary_sentiws <- as.dictionary(sentis)
34 |
35 | polarity(data_dictionary_sentiws) <-
36 | list(pos = c("positive"), neg = c("negative"))
37 | valence(data_dictionary_sentiws) <-
38 | list(positive = positive[!duplicated(positive$word), "score"],
39 | negative = negative[!duplicated(negative$word), "score"])
40 |
41 | meta(data_dictionary_sentiws) <-
42 | list(
43 | title = "SentimentWortschatz (SentiWS)",
44 | description = "A quanteda dictionary object containing SentimentWortschatz (SentiWS), a publicly available German-language resource for sentiment analysis. The current version of SentiWS contains 1,650 positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 negative word forms including their inflections. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one. The original dictionary weights within the interval of -1 to 1. Note that the version implemented in quanteda.dictionaries uses a binary classification into positive (weight > 0) and negative (weight < 0) features.",
45 | url = "http://wortschatz.uni-leipzig.de/en/download/",
46 | reference = "Remus, R., Quasthoff U., and Heyer, G. (2010). [SentiWS: a Publicly Available German-language Resource for Sentiment Analysis](http://www.lrec-conf.org/proceedings/lrec2010/pdf/490_Paper.pdf). In _Proceedings of the 7th International Language Ressources and Evaluation (LREC'10)_, 1168--1171.",
47 | license = "CC-BY-NC-SA 3.0"
48 | )
49 |
50 | usethis::use_data(data_dictionary_sentiws, overwrite = TRUE)
51 |
52 |
--------------------------------------------------------------------------------
/sources/test-misc.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quanteda/quanteda.sentiment/934c1e1f0b1cdf6d9353df0a3759f869f573b6af/sources/test-misc.R
--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("spelling", quietly = TRUE))
2 | spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 | skip_on_cran = TRUE)
4 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library("testthat")
2 | library("quanteda")
3 | library("quanteda.sentiment")
4 |
5 | test_check("quanteda.sentiment")
6 |
--------------------------------------------------------------------------------
/tests/testthat/test-data.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
3 |
4 | test_that("dictionaries have polarities and valences set", {
5 | skip("skip until digits issue can be solved")
6 | expect_output(
7 | print(data_dictionary_AFINN, 0, 0),
8 | "Dictionary object with 1 key entry.\nValences set for keys: AFINN.",
9 | fixed = TRUE
10 | )
11 |
12 | expect_output(
13 | print(data_dictionary_ANEW, 0, 0),
14 | "Dictionary object with 3 key entries.\nValences set for keys: pleasure, arousal, dominance ",
15 | fixed = TRUE
16 | )
17 |
18 | expect_output(
19 | print(data_dictionary_geninqposneg, 0, 0),
20 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
21 | fixed = TRUE
22 | )
23 |
24 | expect_output(
25 | print(data_dictionary_HuLiu, 0, 0),
26 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
27 | fixed = TRUE
28 | )
29 |
30 | expect_output(
31 | print(data_dictionary_LoughranMcDonald, 0, 0),
32 | 'Dictionary object with 9 key entries.\nPolarities: pos = "POSITIVE"; neg = "NEGATIVE" ',
33 | fixed = TRUE
34 | )
35 |
36 | expect_output(
37 | print(data_dictionary_LSD2015, 0, 0),
38 | 'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ',
39 | fixed = TRUE
40 | )
41 |
42 | expect_output(
43 | print(data_dictionary_NRC, 0, 0),
44 | 'Dictionary object with 10 key entries.\nPolarities: pos = "positive"; neg = "negative" ',
45 | fixed = TRUE
46 | )
47 |
48 | expect_output(
49 | print(data_dictionary_Rauh, 0, 0),
50 | 'Dictionary object with 4 key entries.\nPolarities: pos = "positive", "neg_negative"; neg = "negative", "neg_positive" ',
51 | fixed = TRUE
52 | )
53 |
54 | expect_output(
55 | print(data_dictionary_sentiws, 0, 0),
56 | 'Dictionary object with 2 key entries.\nPolarities: pos = "positive"; neg = "negative" \nValences set for keys: positive, negative ',
57 | fixed = TRUE
58 | )
59 | })
60 |
61 | test_that("dictionaries have metadata set", {
62 | meta_ok <- function(d) {
63 | fields <- c("title", "description", "url", "reference", "license")
64 | tmp <- fields %in% names(meta(d))
65 | if (all(tmp)) {
66 | TRUE
67 | } else {
68 | warning("MISSING: ", paste(fields[!tmp], collapse = " "), call. = FALSE)
69 | FALSE
70 | }
71 | }
72 | expect_true(meta_ok(data_dictionary_AFINN))
73 | expect_true(meta_ok(data_dictionary_ANEW))
74 | expect_true(meta_ok(data_dictionary_geninqposneg))
75 | expect_true(meta_ok(data_dictionary_HuLiu))
76 | expect_true(meta_ok(data_dictionary_LoughranMcDonald))
77 | expect_true(meta_ok(data_dictionary_LSD2015))
78 | expect_true(meta_ok(data_dictionary_NRC))
79 | expect_true(meta_ok(data_dictionary_Rauh))
80 | expect_true(meta_ok(data_dictionary_sentiws))
81 | })
82 |
83 |
--------------------------------------------------------------------------------
/tests/testthat/test-misc.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 |
3 | test_that("printing augmented dictionary works", {
4 | skip("skip until digits issue can be solved")
5 | expect_output(
6 | print(data_dictionary_AFINN, 0, 0),
7 | "Dictionary object with 1 key entry.\nValences set for keys: AFINN ",
8 | fixed = TRUE
9 | )
10 |
11 | dict <- quanteda::dictionary(list(one = list(oneA = c("a", "b"),
12 | oneB = "d"),
13 | two = c("x", "y")))
14 | polarity(dict) <- list(pos = "one", neg = "two")
15 | expect_output(
16 | print(dict, 0, 0),
17 | 'Dictionary object with 2 primary key entries and 2 nested levels.\nPolarities: pos = "one"; neg = "two" ',
18 | fixed = TRUE
19 | )
20 | })
21 |
22 | test_that("friendly error messages work", {
23 | expect_error(
24 | textstat_polarity(0),
25 | "textstat_polarity() only works on character, corpus, dfm, tokens objects.",
26 | fixed = TRUE
27 | )
28 | expect_error(
29 | textstat_valence(0),
30 | "textstat_valence() only works on character, corpus, dfm, tokens objects.",
31 | fixed = TRUE
32 | )
33 | })
34 |
35 | test_that("subsetting preserves valence and polarity", {
36 | # expect_output(
37 | # print(data_dictionary_ANEW[1], 0, 0),
38 | # "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ",
39 | # fixed = TRUE
40 | # )
41 | # expect_output(
42 | # print(data_dictionary_ANEW["pleasure"], 0, 0),
43 | # "Dictionary object with 1 key entry.\nValences set for keys: pleasure, arousal, dominance ",
44 | # fixed = TRUE
45 | # )
46 |
47 | dict <- quanteda::dictionary(list(one = c("a", "b"),
48 | two = c("c", "d"),
49 | three = c("e", "f")))
50 | polarity(dict) <- list(pos = c("one", "two"), neg = "three")
51 |
52 | # expect_output(
53 | # print(dict[c(1, 3)], 0, 0),
54 | # 'Dictionary object with 2 key entries.\nPolarities: pos = "one", "two"; neg = "three"',
55 | # fixed = TRUE
56 | # )
57 | })
58 |
--------------------------------------------------------------------------------
/tests/testthat/test-test.R:
--------------------------------------------------------------------------------
1 | test_that("investigate digits problem", {
2 | skip("skip until digits issue can be solved")
3 |
4 | data("data_dictionary_LSD2015", package = "quanteda")
5 | expect_output(print(data_dictionary_LSD2015, max_nkey = 0, max_nval = 0),
6 | "Dictionary object with 4 key entries.",
7 | fixed = TRUE)
8 |
9 | expect_output(print(data_dictionary_geninqposneg, 0, 0),
10 | "Dictionary object with 2 key entries.",
11 | fixed = TRUE)
12 | })
13 |
--------------------------------------------------------------------------------
/tests/testthat/test-textstat_polarity.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 | test_that("textstat_polarity works on all object types", {
3 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
4 | d2 = "good",
5 | d3 = "notsentiment",
6 | d4 = "Great!",
7 | d5 = "good good")
8 |
9 | smooth <- 0.5
10 | logit <- c(log(3 + smooth) - log(2 + smooth),
11 | log(1 + smooth) - log(0 + smooth),
12 | log(0 + smooth) - log(0 + smooth),
13 | log(1 + smooth) - log(0 + smooth),
14 | log(2 + smooth) - log(0 + smooth))
15 |
16 | data(data_dictionary_LSD2015, package = "quanteda.sentiment")
17 |
18 | expect_equivalent(
19 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
20 | data.frame(doc_id = names(txt), sentiment = logit, stringsAsFactors = FALSE)
21 | )
22 | expect_identical(
23 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
24 | textstat_polarity(corpus(txt), dictionary = data_dictionary_LSD2015)
25 | )
26 | expect_identical(
27 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
28 | textstat_polarity(tokens(txt), dictionary = data_dictionary_LSD2015)
29 | )
30 | expect_identical(
31 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015),
32 | textstat_polarity(dfm(tokens(txt)), dictionary = data_dictionary_LSD2015)
33 | )
34 | })
35 |
36 | test_that("different sentiment functions work as expected", {
37 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
38 | d2 = "good",
39 | d3 = "notsentiment",
40 | d4 = "Great!",
41 | d5 = "good good")
42 |
43 | # logit scale
44 | smooth <- 0.5
45 | logit <- c(log(3 + smooth) - log(2 + smooth),
46 | log(1 + smooth) - log(0 + smooth),
47 | log(0 + smooth) - log(0 + smooth),
48 | log(1 + smooth) - log(0 + smooth),
49 | log(2 + smooth) - log(0 + smooth))
50 | data(data_dictionary_LSD2015, package = "quanteda.sentiment")
51 | expect_equal(
52 | logit,
53 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015)$sentiment
54 | )
55 |
56 | # relative proportional difference
57 | rpd <- c(3 - 2,
58 | 1 - 0,
59 | 0 - 0,
60 | 1 - 0,
61 | 2 - 0) / c(5, 1, 0, 1, 2)
62 | expect_equal(
63 | rpd,
64 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015,
65 | fun = sent_relpropdiff)$sentiment
66 | )
67 |
68 | # absolute proportional difference
69 | apd <- c(3 - 2,
70 | 1 - 0,
71 | 0 - 0,
72 | 1 - 0,
73 | 2 - 0) / unname(ntoken(txt))
74 | expect_equal(
75 | apd,
76 | textstat_polarity(txt, dictionary = data_dictionary_LSD2015,
77 | fun = sent_abspropdiff)$sentiment
78 | )
79 | })
80 |
81 | test_that("textstat_polarity error conditions work", {
82 | dict <- dictionary(list(
83 | happy = c("happy", "jubilant", "exuberant"),
84 | sad = c("sad", "morose", "down"),
85 | okay = "just okay"
86 | ))
87 | expect_error(
88 | textstat_polarity("Happy, sad, neutral.", dictionary = dict),
89 | "polarity is not set for this dictionary; see ?polarity",
90 | fixed = TRUE
91 | )
92 |
93 | })
94 |
95 | test_that("polarity functions work", {
96 | dict <- dictionary(list(
97 | happy = c("happy", "jubilant", "exuberant"),
98 | sad = c("sad", "morose", "down"),
99 | okay = "just okay"
100 | ))
101 |
102 | expect_equal(polarity(dict), NULL)
103 |
104 | polarity(dict) <- list(pos = "happy", neg = "sad")
105 | expect_identical(
106 | polarity(dict),
107 | list(pos = "happy", neg = "sad")
108 | )
109 |
110 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
111 | expect_identical(
112 | polarity(dict),
113 | list(pos = "happy", neg = "sad", neut = "okay")
114 | )
115 |
116 | polarity(dict) <- list(pos = c("happy", "okay"), neg = "sad")
117 | expect_identical(
118 | polarity(dict),
119 | list(pos = c("happy", "okay"), neg = "sad")
120 | )
121 |
122 | expect_error(
123 | polarity(dict) <- list(blank = "happy", neg = "sad"),
124 | "value must be a list of 'pos', 'neg', and (optionally) 'neut'",
125 | fixed = TRUE
126 | )
127 | expect_error(
128 | polarity(dict) <- list(pos = "happy", neg = "sad", neutr = "okay"),
129 | "value must be a list of 'pos', 'neg', and (optionally) 'neut'",
130 | fixed = TRUE
131 | )
132 |
133 | # this should generate an error
134 | expect_error(
135 | polarity(dict) <- list(pos = "notfound", neg = "sad"),
136 | "'notfound' key not found in this dictionary"
137 | )
138 |
139 | # should test that both pos and neg are assigned ?
140 |
141 | })
142 |
143 | test_that("get_polarity_dictionary() works", {
144 | dict <- dictionary(list(
145 | happy = c("happy", "jubilant", "exuberant"),
146 | sad = c("sad", "morose", "down"),
147 | okay = "just okay"
148 | ))
149 | expect_equal(polarity(dict), NULL)
150 |
151 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
152 |
153 | expect_identical(
154 | quanteda.sentiment:::get_polarity_dictionary(dict) |>
155 | quanteda::as.list(),
156 | list(pos = c("happy", "jubilant", "exuberant"),
157 | neg = c("sad", "morose", "down"),
158 | neut = "just okay")
159 | )
160 |
161 | expect_identical(
162 | quanteda.sentiment:::get_polarity_dictionary(dict) |> polarity(),
163 | list(pos = "pos", neg = "neg", neut = "neut")
164 | )
165 |
166 | polarity(dict) <- list(pos = "happy", neg = "sad", neut = "okay")
167 | dict["okay"] <- NULL
168 | expect_error(
169 | quanteda.sentiment:::get_polarity_dictionary(dict),
170 | "'okay' key not found in this dictionary"
171 | )
172 | })
173 |
174 | test_that("nested scope works for textstatpolarity on tokens", {
175 | dict <- dictionary(list(positive = "good", negative = "not good"))
176 | polarity(dict) <- list(pos = "positive", neg = "negative")
177 | valence(dict) <- c(positive = 1, negative = -1)
178 | toks <- tokens("The test is not good")
179 |
180 | expect_equivalent(
181 | textstat_polarity(toks, dictionary = dict, fun = sent_abspropdiff),
182 | data.frame(doc_id = "text1", sentiment = -0.25, row.names = NULL)
183 | )
184 | })
185 |
--------------------------------------------------------------------------------
/tests/testthat/test-textstat_valence.R:
--------------------------------------------------------------------------------
1 | library("quanteda")
2 |
3 | test_that("textstat_valence works for uniform valences within key", {
4 | dict <- dictionary(list(positive = c("good", "great"),
5 | negative = c("bad"),
6 | neg_positive = "not good",
7 | neg_negative = "not bad"))
8 | txt <- c(d1 = "good good bad bad good word1 word1 word1 word2 word2",
9 | d2 = "good",
10 | d3 = "notsentiment",
11 | d4 = "Great! Not bad.",
12 | d5 = "good good not good bad")
13 |
14 | # for two categories
15 | valence(dict) <- list(positive = 1, negative = -1)
16 |
17 | corp <- corpus(txt)
18 | toks <- tokens(corp)
19 | dfmat <- dfm(toks)
20 |
21 | expect_identical(
22 | textstat_valence(corp, dict),
23 | textstat_valence(toks, dict)
24 | )
25 | expect_identical(
26 | textstat_valence(corp, dict),
27 | textstat_valence(dfmat, dict)
28 | )
29 |
30 | expect_identical(
31 | textstat_valence(corp, dict)$sentiment,
32 | c((3 * 1 + 2 * -1) / (3 + 2),
33 | (1 * 1 + 0 * -1) / (1 + 0),
34 | (0 * 1 + 0 * -1) / (1),
35 | (1 * 1 + 1 * -1) / (1 + 1),
36 | (3 * 1 + 1 * -1) / (3 + 1))
37 | )
38 |
39 | # for multiple categories within one polarity
40 | valence(dict) <- list(positive = 1, negative = -1,
41 | neg_negative = 1, neg_positive = -1)
42 | expect_identical(
43 | textstat_valence(corp, dict),
44 | textstat_valence(toks, dict)
45 | )
46 | expect_equal(
47 | all.equal(textstat_valence(corp, dict)$sentiment,
48 | textstat_valence(dfmat, dict)$sentiment),
49 | "Mean relative difference: 1.5"
50 | )
51 | expect_identical(
52 | textstat_valence(corp, dict)$sentiment,
53 | c((3 * 1 + 2 * -1) / (5),
54 | (1 * 1 + 0 * -1) / (1),
55 | (0 * 1 + 0 * -1) / (1),
56 | (2 * 1 + 0 * -1) / (2),
57 | (2 * 1 + 2 * -1) / (4))
58 | )
59 | })
60 |
61 | test_that("textstat_valence with individual value scores works", {
62 | dict <- dictionary(list(
63 | happy = c("happy", "jubilant", "exuberant"),
64 | sad = c("sad", "morose", "down"),
65 | okay = c("just okay", "okay")
66 | ))
67 | valence(dict) <- list(
68 | happy = c("happy" = 1, "jubilant" = 2, "exuberant" = 2),
69 | sad = c("sad" = -1, "morose" = -2, "down" = -1),
70 | okay = c("just okay" = 0.5, "okay" = 5)
71 | )
72 | txt <- c(d1 = "sad word happy word exuberant",
73 | d2 = "down sad just okay",
74 | d3 = "sad happy word word")
75 |
76 | corp <- corpus(txt)
77 | toks <- tokens(corp) %>%
78 | tokens_compound(dict, concatenator = " ")
79 | dfmat <- dfm(toks)
80 |
81 | expect_identical(
82 | textstat_valence(corp, dict),
83 | textstat_valence(toks, dict)
84 | )
85 | expect_identical(
86 | textstat_valence(corp, dict),
87 | textstat_valence(dfmat, dict)
88 | )
89 |
90 | sent <- c((-1 + 1 + 2) / 3, # 5
91 | (-1 - 1 + 0.5) / 3,
92 | (-1 + 1) / 2) # 4
93 | expect_identical(
94 | textstat_valence(txt, dict),
95 | data.frame(doc_id = docnames(dfmat),
96 | sentiment = sent)
97 | )
98 | })
99 |
100 | test_that("textstat_valence error conditions work", {
101 | dict <- dictionary(list(
102 | happy = c("happy", "jubilant", "exuberant"),
103 | sad = c("sad", "morose", "down"),
104 | okay = "just okay"
105 | ))
106 | expect_error(
107 | textstat_valence("Happy, sad, neutral.", dictionary = dict),
108 | "no valenced keys found"
109 | )
110 | })
111 |
112 | test_that("valence assignment functions work", {
113 | dict <- dictionary(list(
114 | happy = c("happy", "jubilant", "exuberant"),
115 | sad = c("sad", "morose", "down"),
116 | okay = "just okay"
117 | ))
118 |
119 | expect_equal(valence(dict), NULL)
120 |
121 | expect_error(
122 | valence(dict) <- list(happy = "a", sad = -1),
123 | "valence values must be numeric"
124 | )
125 |
126 | valence(dict) <- list(happy = 1, sad = -1, okay = 0)
127 | expect_identical(
128 | valence(dict),
129 | list(happy = c(happy = 1, jubilant = 1, exuberant = 1),
130 | sad = c(sad = -1, morose = -1, down = -1),
131 | okay = c(`just okay` = 0))
132 | )
133 | })
134 |
135 | test_that("valence error checks work", {
136 | dict <- dictionary(list(top = c("top1", "top2"),
137 | nested = list(nest1 = c("a", "one"),
138 | nest2 = c("b", "two"))))
139 | expect_error(
140 | valence(dict) <- list(top = c(1, 2), nested = -5),
141 | "valenced dictionaries cannot be nested"
142 | )
143 | })
144 |
145 | test_that("dictionary print method shows valence and polarity", {
146 | dict <- dictionary(list(
147 | happy = c("happy", "jubilant", "exuberant"),
148 | sad = c("sad", "morose", "down")
149 | ))
150 | valence(dict) <- c(happy = 1, sad = -1)
151 | expect_output(print(dict),
152 | "Dictionary object with 2 key entries.
153 | Valences set for keys: happy, sad
154 | - [happy]:
155 | - happy, jubilant, exuberant
156 | - [sad]:
157 | - sad, morose, down", fixed = TRUE)
158 |
159 | dict <- dictionary(list(
160 | happiness = c("happy", "jubilant", "exuberant", "content"),
161 | anger = c("mad", "peeved", "irate", "furious", "livid")
162 | ))
163 | valence(dict) <- list(happiness = c(3, 4, 5, 2),
164 | anger = c(3.1, 2.4, 2.9, 4.1, 5.0))
165 | expect_output(print(dict),
166 | "Dictionary object with 2 key entries.
167 | Valences set for keys: happiness, anger
168 | - [happiness]:
169 | - happy, jubilant, exuberant, content
170 | - [anger]:
171 | - mad, peeved, irate, furious, livid", fixed = TRUE)
172 | })
173 |
174 | test_that("overlapping values work as expected", {
175 | dict <- dictionary(list(
176 | happy = c("okay", "exuberant"),
177 | sad = c("okay", "depressed")
178 | ))
179 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
180 | sad = c(depressed = -4, okay = -2))
181 | expect_identical(
182 | textstat_valence("Depressed not okay", dict)$sentiment,
183 | (-4 + 1 - 2) / 3
184 | )
185 | expect_identical(
186 | textstat_valence("Depressed not okay", dict)$sentiment,
187 | textstat_valence(dfm(tokens("Depressed not okay")), dict)$sentiment
188 | )
189 | })
190 |
191 | test_that("normalization methods work for textstat_valence", {
192 | dict <- dict <- dictionary(list(positive = c("good", "great"),
193 | negative = c("bad", "awful")))
194 | valence(dict) <- list(positive = 1, negative = -1)
195 | polarity(dict) <- list(pos = "positive", neg = "negative")
196 |
197 | txt <- c(d1 = "Good good bad other.",
198 | d2 = "Word word other bad!",
199 | d3 = "Great awful other £1.")
200 | toks <- tokens(txt)
201 |
202 | # relative proportional difference
203 | pol_rpd <- data.frame(doc_id = docnames(toks),
204 | sentiment = c( (2 - 1) / 3,
205 | (0 - 1) / 1,
206 | (1 - 1) / 2 ))
207 | expect_equivalent(
208 | textstat_polarity(toks, dict, sent_relpropdiff),
209 | pol_rpd
210 | )
211 | expect_equivalent(
212 | textstat_valence(toks, dict, normalization = "dictionary"),
213 | textstat_polarity(toks, dict, sent_relpropdiff)
214 | )
215 |
216 | # absolute proportional difference
217 | pol_apd <- data.frame(doc_id = docnames(toks),
218 | sentiment = c( (2 - 1) / 5,
219 | (0 - 1) / 5,
220 | (1 - 1) / 6 ))
221 | expect_equivalent(
222 | textstat_polarity(toks, dict, sent_abspropdiff),
223 | pol_apd
224 | )
225 | expect_equivalent(
226 | textstat_valence(toks, dict, normalization = "all"),
227 | textstat_polarity(toks, dict, sent_abspropdiff)
228 | )
229 |
230 | # no normalization
231 | expect_equivalent(
232 | textstat_valence(toks, dict, normalization = "none"),
233 | data.frame(doc_id = docnames(toks), sentiment = c(1, -1, 0))
234 | )
235 |
236 | # logit scale
237 | pol_log <- data.frame(doc_id = docnames(toks),
238 | sentiment = c( log(2 + .5) - log(1 + .5),
239 | log(0 + .5) - log(1 + .5),
240 | log(1 + .5) - log(1 + .5) ))
241 | expect_equivalent(
242 | textstat_polarity(toks, dict, sent_logit),
243 | pol_log
244 | )
245 | # dfmat <- dfm(toks) %>%
246 | # dfm_lookup(dict)
247 | # dfm_weight(scheme = "logsmooth", base = exp(1))
248 | # expect_equivalent(
249 | # textstat_valence(dfmat, dict),
250 | # textstat_polarity(toks, dict, sent_logit)
251 | # )
252 | })
253 |
254 | test_that("worker functions work", {
255 | dict <- dictionary(list(positive = "good", negative = "bad"))
256 | expect_error(
257 | quanteda.sentiment:::flip_valence(dict),
258 | "valence not set"
259 | )
260 | })
261 |
262 | test_that("valence error conditions work", {
263 | dict <- dictionary(list(
264 | happy = c("okay", "exuberant"),
265 | sad = c("okay", "depressed")
266 | ))
267 |
268 | expect_error(
269 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
270 | c(depressed = -4, okay = -2)),
271 | "valence must be a fully named list"
272 | )
273 |
274 | expect_error(
275 | valence(dict) <- list(happy = c(okay = 1, exuberant = 3),
276 | other = c(depressed = -4, okay = -2)),
277 | "'other' is not a dictionary key"
278 | )
279 |
280 | expect_error(
281 | valence(dict) <- list(happy = c(1, 3, 2)),
282 | "valence value length not equal to number of values for key 'happy'"
283 | )
284 | })
285 |
286 | test_that("nested scope works for textstat_valence on tokens", {
287 | dict <- dictionary(list(positive = "good", negative = "not good"))
288 | polarity(dict) <- list(pos = "positive", neg = "negative")
289 | valence(dict) <- c(positive = 1, negative = -1)
290 | toks <- tokens("The test is not good")
291 |
292 | expect_equivalent(
293 | textstat_valence(toks, dictionary = dict),
294 | data.frame(doc_id = "text1", sentiment = -1, row.names = NULL)
295 | )
296 | })
297 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/sentiment_analysis.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Dictionary-based sentiment analysis using quanteda"
3 | author: "Kenneth Benoit"
4 | output:
5 | rmarkdown::html_vignette:
6 | toc: true
7 | vignette: >
8 | %\VignetteIndexEntry{Dictionary-based sentiment analysis using quanteda}
9 | %\VignetteEngine{knitr::rmarkdown}
10 | %\VignetteEncoding{UTF-8}
11 | ---
12 |
13 | ```{r, echo = FALSE}
14 | knitr::opts_chunk$set(
15 | collapse = TRUE,
16 | comment = "##"
17 | )
18 | ```
19 |
20 | ## Overview
21 |
22 | Sentiment analysis using dictionaries can be applied to any text, tokens, or dfm using `textstat_polarity()` or `textstat_valence()`. This function takes the **quanteda** object as an input, along with a dictionary whose valence or polarity has been set. The two ways of setting dictionary values allow a user to weight each _key_ with a polarity weight, or each _value_ within keys with a _valence_ weight.
23 |
24 | Dictionaries consist of keys and values, where the "key" is the canonical category such as "positive" or "negative", and the "values" consist of the patterns assigned to each key that will be counted as occurrences of those keys when its dictionary is applied using `tokens_lookup()` or `dfm_lookup()`.
25 |
26 | In the Lexicoder Sentiment Dictionary 2015 (`data_dictionary_LSD2015`) that is distributed with *the package, **quanteda**, instance, the dictionary has four keys, with between 1,721 and 2,860 values each:
27 | ```{r}
28 | library("quanteda", warn.conflicts = FALSE, verbose = FALSE)
29 | library("quanteda.sentiment", warn.conflicts = FALSE, verbose = FALSE)
30 |
31 | print(data_dictionary_LSD2015, max_nval = 5)
32 | lengths(data_dictionary_LSD2015)
33 | ```
34 | As can be seen, these use "glob" pattern matches and may be multi-word values, such as "a lie" or "no damag*".
35 |
36 |
37 | ## Polarity and valence
38 |
39 | Dictionary-based sentiment analysis in **quanteda** can take place in two different forms, depending on whether dictionary keys are part of a _polarity_-based sentiment scheme -- such as positive versus negative dictionary categories (keys) -- or whether a continuous sentiment score is associated with individual word patterns, what we call a _valence_-based sentiment scheme.
40 |
41 | Dictionaries can have both polarity and valence weights, but these are not used in the same sentiment scoring scheme. "Polarity" is a category of one of two "poles" (such as negative and positive) applied to dictionary keys, whereas "valence" is a weight applied individually to each value within a key.
42 |
43 | ### Polarity weights
44 |
45 | Polarity weighting assigns the following categories to dictionary keys, to represent the "poles":
46 | * `pos` -- a "positive" end of the scale, although this notion does not need literally to mean positive sentiment. It could indicate any polar position, such as terms indicating confidence.
47 | * `neg` -- a "negative" end of the scale, although once again, this does not need literally to mean negative sentiment. In the example of "positive" indicating confidence, for instance, the "negative" pole could indicate tentative or uncertain language.
48 | * optionally, a `neut` category can also be identified, if this is desired.
49 |
50 | Dictionary keys are linked to each pole using the `polarity() <-` assignment function. The keys linked to each pole will be indicated in the summary information when the dictionary is printed, or returned as a list when calling the function `polarity()`.
51 |
52 | ```{r}
53 | polarity(data_dictionary_LSD2015)
54 | polarity(data_dictionary_LSD2015) <- list(pos = "positive", neg = "negative")
55 | ```
56 |
57 | Poles can be linked to multiple dictionary keys. For instance, in the Lexicoder 2015 dictionary, there are also two "negation" keys,
58 | `neg_positive` and `neg_negative`, meant to negate the positive terms, and negate negative terms. To add these to our polarities, we would simply assign them as a list.
59 |
60 | ```{r}
61 | polarity(data_dictionary_LSD2015)
62 | polarity(data_dictionary_LSD2015) <-
63 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
64 | print(data_dictionary_LSD2015, 0, 0)
65 | ```
66 |
67 |
68 | ### Valence weights
69 |
70 | Valence weighting is value-based, allowing individual numeric weights to be assigned to word patterns ("values"), rather than being a single pole attached to all of the values in a dictionary key. This allows different weights to be assigned within dictionary keys, for instance with different strengths of positivity or negativity.
71 |
72 | If we wanted to nuance this dictionary, for instance, we could assign valences to each key:
73 | ```{r}
74 | dict <- dictionary(list(quality = c("bad", "awful", "horrific",
75 | "good", "great", "amazing")))
76 | dict
77 | ```
78 | This dictionary has no valences until they are set. To assign valences, we use the `valence()` replacement function, assigning it a list with the values equal to the dictionary structure. The name of the list elements should match the dictionary key whose valence is being set, and elements each key should be a vector of valences. When this numeric vector is named, order does not matter; otherwise, the order used will be that of the dictionary's values.
79 | ```{r}
80 | valence(dict) <- list(quality = c(amazing = 2.2, awful = -1.5, bad = -1,
81 | horrific = -2, good = 1, great = 1.7))
82 | ```
83 | Now, we can see that the valences are set:
84 | ```{r}
85 | dict
86 | valence(dict)
87 | ```
88 | Because valences are set within key, different keys can have different valences, even when the word values are the same. So we could add a second key like this:
89 | ```{r}
90 | dict["location"] <- dict["quality"]
91 | valence(dict)["location"] <- list(location = c(amazing = 2.2, awful = -1.5, bad = -1,
92 | horrific = -2, good = 1, great = 1.7))
93 | print(dict, 0, 0)
94 | ```
95 |
96 |
97 | This allows sentiment to be counted for dictionaries like the [Affective Norms for English Words (ANEW)](https://csea.phhp.ufl.edu/media.html#bottommedia) dictionary, which has numerical weights from 1.0 to 9.0 for word values in each of three categories: pleasure, arousal, and dominance. As a **quanteda** dictionary, this would consist of three dictionary keys (one for each of pleasure, arousal, and dominance) and each word pattern would form a value in each key. Each word value, furthermore, would have a valence. This allows a single dictionary to contain multiple categories of valence, which can be combined or examined separately using `textstat_sentiment()`. We return to the example of the ANEW dictionary below.
98 |
99 | Valence can also be assigned to provide the same weight to every value within a key, making it equivalent to polarity. For instance:
100 | ```{r}
101 | dict <- dictionary(list(neg = c("bad", "awful", "horrific"),
102 | pos = c("good", "great", "amazing")))
103 | valence(dict) <- list(neg = -1, pos = 1)
104 | print(dict)
105 | valence(dict)
106 | ```
107 |
108 | ### Effects of polarity and valence weights on other functions
109 |
110 | These weights are not currently used by any function other than `textstat_polarity()` and `textstat_valence()`. When using dictionaries with a polarity or valence in any other function, these have no effect. Dictionaries with polarity or valence set operate in every other respect just like regular **quanteda** dictionaries with no polarity or valence.
111 |
112 |
113 | ## Computing sentiment with polarities
114 |
115 | ### Simple example with the LSD 2015 dictionary
116 |
117 | Let's take simple example of a text with some positive and negative words found in the LSD2015 dictionary. The polarities of this dictionary are assigned by default, so we will erase our local copy and use the one found in the **quanteda.sentiment** package.
118 |
119 | ```{r}
120 | txt <- c(doc1 = "This is a fantastic, wonderful example.",
121 | doc2 = "The settlement was not amiable.",
122 | doc3 = "The good, the bad, and the ugly.")
123 | toks <- tokens(txt)
124 |
125 | data("data_dictionary_LSD2015", package = "quanteda.sentiment")
126 | polarity(data_dictionary_LSD2015)
127 | ```
128 |
129 | First, let's see what will be matched.
130 | ```{r}
131 | tokens_lookup(toks, data_dictionary_LSD2015, nested_scope = "dictionary",
132 | exclusive = FALSE)
133 | ```
134 | Notice the `nested_scope = "dictionary"` argument. This tells the lookup function to consider the scope at which to stop "nesting" the value matches across the dictionary, rather than the default which is within keys. Otherwise, the tokens "not", "amiable" in `doc2` would be matched twice: one for the positive key, matched from the value `"amiab*"`; and once for the `neg_positive` key, matched from the value `not amiab*"`. With the entire dictionary as the `nested_scope`, however, the (`neg_positive`) `"not amiab*"` is matched first, and then the shorter value from the other (`positive`) key `"amiab*"` is not also matched.
135 |
136 | To compute a polarity-based sentiment score, we need a formula specifying how the categories will be combined. This is supplied through the `fun` argument, which names a function for scoring sentiment through a combination of `pos`, `neg`, and optionally `neut` and `N`, where `N` is short for the total number of tokens or features.
137 |
138 | The **quanteda.sentiment** package includes three functions for converting polarities into a continuous index of sentiment, from Lowe et. al. (2011). These are:
139 |
140 | * `sent_logit`, a logit scale computed as $\mathrm{log}(pos + 0.5) - \mathrm{log}(neg + 0.5))$, also the default method;
141 |
142 | * `sent_abspropdiff`, the "absolute proportional difference" scale comparing the difference between positive and negative mentions as a proportion of all counts: computed as $\frac{pos - neg}{N}$; and
143 |
144 | * `sent_relpropdiff`, the "relative proportional difference" scale comparing the difference between positive and negative mentions as a proportion of only the total positive and negative mentions, computed as $\frac{pos - neg}{pos + neg}$.
145 |
146 | Additional custom functions, including those making use of the $neut$ category or using custom weights, can be supplied through the `fun` argument in `textstat_polarity()`, with additional arguments to `fun` supplied through `...` (for instance, the `smooth` argument in `sent_logit`)
147 |
148 | So to compute sentiment for the example, we simply need to call `textstat_polarity()`:
149 | ```{r}
150 | textstat_polarity(toks, data_dictionary_LSD2015)
151 | ```
152 | Or for an alternative scale:
153 | ```{r}
154 | textstat_polarity(toks, data_dictionary_LSD2015, fun = sent_relpropdiff)
155 | ```
156 | ## Example on real texts
157 |
158 | Let's apply the LSD 2015 to political speeches, namely the inaugural addresses of the US presidents since 1970. We'll use the negation categories too. Notice that we don't even need to tokenize the text here, since the `textstat_polarity()` function can take a corpus as input (and will take care of the appropriate tokenization on its own).
159 |
160 | ```{r}
161 | polarity(data_dictionary_LSD2015) <-
162 | list(pos = c("positive", "neg_negative"), neg = c("negative", "neg_positive"))
163 |
164 | sent_pres <- data_corpus_inaugural %>%
165 | corpus_subset(Year > 1970) %>%
166 | textstat_polarity(data_dictionary_LSD2015)
167 | sent_pres
168 | ```
169 | We can plot this:
170 | ```{r}
171 | library("ggplot2")
172 | ggplot(sent_pres) +
173 | geom_point(aes(x = sentiment, y = reorder(doc_id, sentiment))) +
174 | ylab("")
175 | ```
176 |
177 | ## Computing sentiment with valences
178 |
179 | Valences provide a more flexible method for computing sentiment analysis based on sentiment values, or valences, attached to specific word patterns.
180 |
181 | ### Simple example with user-supplied valences
182 |
183 | For a dictionary whose polarity or sentiment has been set, computing sentiment is simple: `textstat_sentiment()` is applied to the object along with the dictionary. Here, we demonstrate this for the LSD2105.
184 |
185 | ```{r}
186 | txt <- c(doc1 = "This is a fantastic, wonderful example.",
187 | doc2 = "The settlement was not amiable.",
188 | doc3 = "The good, the bad, and the ugly.")
189 | toks <- tokens(txt)
190 |
191 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1)
192 | ```
193 |
194 |
195 | To compute sentiment, `textstat_sentiment()` will count the two positive and zero negative matches from the first example, and average these across all matches, for score of 1.0. In the second document, the positive match will generate a score of 1.0, and in the third document, the scores will be `sum(1, -1, -1) / 3 = -0.33`.
196 | ```{r}
197 | textstat_valence(toks, data_dictionary_LSD2015)
198 | ```
199 |
200 | Note that if we include the other dictionary keys, however, then "not amicable" will be matched in the `neg_positive` count, rather than the word "amicable" being counted as positive. Because many dictionary values may be multi-word patterns, we always recommend using `textstat_sentiment()` on tokens, rather than on `dfm` objects whose features are dictionary keys rather than values.
201 | ```{r}
202 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1,
203 | neg_negative = 1, neg_positive = -1)
204 | textstat_valence(toks, data_dictionary_LSD2015)
205 | ```
206 |
207 | Here, document 2 is now computed as -1 because its dictionary match is actually to the "neg_positive" category that has a valence of -1. The sentiment function ignored the key whose polarity was not set before, but applies it with `nested_scope = "dictionary"` when it is set, to ensure that only the longer phrase is matched.
208 | ```{r}
209 | tokens_lookup(toks, data_dictionary_LSD2015, exclusive = FALSE,
210 | nested_scope = "dictionary")
211 | ```
212 |
213 | ### Using the AFINN dictionary
214 |
215 | We can build this dictionary from scratch using the source data:
216 | ```{r}
217 | afinn <- read.delim(system.file("extdata/afinn/AFINN-111.txt",
218 | package = "quanteda.sentiment"),
219 | header = FALSE, col.names = c("word", "valence"))
220 | head(afinn)
221 | ```
222 |
223 | To make this into a **quanteda** dictionary:
224 | ```{r}
225 | data_dictionary_afinn <- dictionary(list(afinn = afinn$word))
226 | valence(data_dictionary_afinn) <- list(afinn = afinn$valence)
227 | data_dictionary_afinn
228 | ```
229 | This dictionary has a single key we have called "afinn", with the valences set from the original `afinn` data.frame/tibble.
230 |
231 | We can now use this to apply `textstat_valence()`:
232 | ```{r}
233 | textstat_valence(toks, data_dictionary_afinn)
234 | ```
235 | How was this computed? We can use the dictionary to examine the words, and also to get their sentiment.
236 | ```{r}
237 | tokssel <- tokens_select(toks, data_dictionary_afinn)
238 | tokssel
239 |
240 | valence(data_dictionary_afinn)$afinn[as.character(tokssel)]
241 | ```
242 | So here, doc1 had a score of `(4 + 4) / 2 = 4`, doc2 has no score because none of its tokens matched values in the AFINN dictionary, and doc3 was `(3 + -3 + -3) / 3 = -1`.
243 |
244 | ### Using the ANEW dictionary with multiple keys
245 |
246 | The ANEW, or Affective Norms for English Words (Bradley and Lang 2017), provides a lexicon of 2,471 distinct fixed word matches that are associated with three valenced categories: pleasure, arousal, and dominance. Reading in the original format, we have to convert this into a **quanteda** dictionary format, and add the valence values. Because this format requires a list of separate keys, we need to create a dictionary key for each of the three categories, and assign the lexicon to each key. With the ANEW, it just so happens that the lexicon -- or "values" in **quanteda** parlance -- are the same for each key, but this is not a necessary feature of valenced dictionaries.
247 |
248 | ```{r}
249 | anew <- read.delim(url("https://bit.ly/2zZ44w0"))
250 | anew <- anew[!duplicated(anew$Word), ] # because some words repeat
251 | data_dictionary_anew <- dictionary(list(pleasure = anew$Word,
252 | arousal = anew$Word,
253 | dominance = anew$Word))
254 | valence(data_dictionary_anew) <- list(pleasure = anew$ValMn,
255 | arousal = anew$AroMn,
256 | dominance = anew$DomMn)
257 | ```
258 |
259 | Now we can see that we have the dictionary in **quanteda** format with the valences attached. We also see that the values are the same in each key.
260 | ```{r}
261 | print(data_dictionary_anew, max_nval = 5)
262 | ```
263 |
264 | The best way to compute sentiment is to choose a key and use it separately, because each key here contains the same values.
265 | ```{r}
266 | textstat_valence(toks, data_dictionary_anew["pleasure"])
267 | textstat_valence(toks, data_dictionary_anew["arousal"])
268 | ```
269 |
270 | If we don't subset the dictionary keys, it will combine them, which is probably not want we want:
271 | ```{r}
272 | textstat_valence(toks, data_dictionary_anew)
273 |
274 | tokssel <- tokens_select(toks, data_dictionary_anew)
275 | vals <- lapply(valence(data_dictionary_anew),
276 | function(x) x[as.character(tokssel)])
277 | vals
278 | ```
279 | Without selection, the average is across all three keys:
280 | ```{r}
281 | mean(unlist(vals))
282 | ```
283 |
284 | ## Equivalences between polarity and valence approaches
285 |
286 | Valences can be set to produce equivalent results to sentiment, if this is desired. Considering our brief example above, and making sure we have both polarity and valence set for the LSD2015, we can show this for the two non-logit scale polarity functions.
287 | ```{r}
288 | corpus(txt)
289 | valence(data_dictionary_LSD2015) <- list(positive = 1, negative = -1,
290 | neg_negative = 1, neg_positive = -1)
291 | print(data_dictionary_LSD2015, 0, 0)
292 | ```
293 | Computing this by absolute proportional difference:
294 | ```{r}
295 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_abspropdiff)
296 | ```
297 | is the same as computing it this way using valences:
298 | ```{r}
299 | textstat_valence(txt, data_dictionary_LSD2015, norm = "all")
300 | ```
301 | For the relative proportional difference:
302 | ```{r}
303 | textstat_polarity(txt, data_dictionary_LSD2015, fun = sent_relpropdiff)
304 | textstat_valence(txt, dictionary = data_dictionary_LSD2015, norm = "dict")
305 | ```
306 |
307 |
308 | ## References
309 |
310 | Bradley, M.M. & Lang, P.J. (2017). [Affective Norms for English Words (ANEW): Instruction manual and affective ratings](https://pdodds.w3.uvm.edu/teaching/courses/2009-08UVM-300/docs/others/everything/bradley1999a.pdf). _Technical Report C-3_. Gainesville, FL: UF Center for the Study of Emotion and Attention.
311 |
312 | Liu, B. (2015). _Sentiment analysis: Mining opinions, sentiments, and emotions_. Cambridge University Press.
313 |
314 | Lowe, W., Benoit, K. R., Mikhaylov, S., & Laver, M. (2011). Scaling Policy Preferences from Coded Political Texts. _Legislative Studies Quarterly_, 36(1), 123–155. \doi{10.1111/j.1939-9162.2010.00006.x}.
315 |
--------------------------------------------------------------------------------