├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── RcppExports.R
    ├── attribution.R
    ├── deprecated.R
    ├── sentiment_engines.R
    ├── sentocorpus.R
    ├── sentolexicons.R
    ├── sentomeasures_main.R
    ├── sentomeasures_measures_xyz.R
    ├── sentomeasures_methods.R
    ├── sentometrics.R
    ├── sentomodel.R
    ├── utils.R
    └── zzz.R
├── README.md
├── Sentometrics.Rproj
├── appendix
    ├── output_timings.txt
    ├── run_timings.R
    └── vignette_supplementary_appendix.pdf
├── cran-comments.md
├── data-raw
    ├── FEEL_eng_tr.rda
    ├── FEEL_fr.rda
    ├── FEEL_nl_tr.rda
    ├── GI_eng.rda
    ├── GI_fr_tr.rda
    ├── GI_nl_tr.rda
    ├── HENRY_eng.rda
    ├── HENRY_fr_tr.rda
    ├── HENRY_nl_tr.rda
    ├── LM_eng.rda
    ├── LM_fr_tr.rda
    ├── LM_nl_tr.rda
    ├── US_EPU_1985-2018.csv
    ├── US_economic_news_1951-2014.csv
    ├── _sources.txt
    ├── datasets.R
    ├── lexicons-raw
    │   ├── FEEL.csv
    │   ├── FEEL_eng.csv
    │   ├── FEEL_nl.csv
    │   ├── FEEL_raw.csv
    │   ├── GI.csv
    │   ├── GI_fr.csv
    │   ├── GI_nl.csv
    │   ├── GI_raw.csv
    │   ├── HENRY.csv
    │   ├── HENRY_fr.csv
    │   ├── HENRY_nl.csv
    │   ├── LM.csv
    │   ├── LM_fr.csv
    │   ├── LM_nl.csv
    │   └── LM_raw.csv
    ├── valence-raw
    │   └── valShifters.rda
    ├── valence_eng.rda
    ├── valence_fr.rda
    └── valence_nl.rda
├── data
    ├── epu.rda
    ├── list_lexicons.rda
    ├── list_valence_shifters.rda
    └── usnews.rda
├── docs
    ├── 404.html
    ├── apple-touch-icon-120x120.png
    ├── apple-touch-icon-152x152.png
    ├── apple-touch-icon-180x180.png
    ├── apple-touch-icon-60x60.png
    ├── apple-touch-icon-76x76.png
    ├── apple-touch-icon.png
    ├── articles
    │   ├── applications
    │   │   ├── epu.html
    │   │   ├── epu_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── unnamed-chunk-10-1.png
    │   │   │   │   ├── unnamed-chunk-15-1.png
    │   │   │   │   └── unnamed-chunk-9-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │   │   └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │   │   └── header-attrs.js
    │   │   ├── vix.html
    │   │   └── vix_files
    │   │   │   ├── figure-html
    │   │   │       ├── unnamed-chunk-14-1.png
    │   │   │       ├── unnamed-chunk-16-1.png
    │   │   │       ├── unnamed-chunk-16-2.png
    │   │   │       └── unnamed-chunk-9-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │       └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │       └── header-attrs.js
    │   ├── contributions
    │   │   ├── gopress.html
    │   │   ├── gopress_figures
    │   │   │   ├── read_later.jpg
    │   │   │   └── save_as.jpg
    │   │   ├── gopress_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── sento 3-1.png
    │   │   │   │   ├── sento topic 3-1.png
    │   │   │   │   ├── unnamed-chunk-3-1.png
    │   │   │   │   └── unnamed-chunk-4-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │   │   └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │   │   └── header-attrs.js
    │   │   ├── isa.html
    │   │   └── isa_files
    │   │   │   ├── figure-html
    │   │   │       ├── unnamed-chunk-10-1.png
    │   │   │       ├── unnamed-chunk-19-1.png
    │   │   │       └── unnamed-chunk-27-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │       └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │       └── header-attrs.js
    │   ├── development.html
    │   ├── development_files
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── examples
    │   │   ├── corpus.html
    │   │   ├── corpus_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── unnamed-chunk-5-1.png
    │   │   │   │   ├── unnamed-chunk-5-2.png
    │   │   │   │   └── unnamed-chunk-5-3.png
    │   │   │   ├── header-attrs-2.10
    │   │   │   │   └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │   │   └── header-attrs.js
    │   │   ├── indexation.html
    │   │   ├── indexation_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── unnamed-chunk-11-1.png
    │   │   │   │   ├── unnamed-chunk-4-1.png
    │   │   │   │   ├── unnamed-chunk-4-2.png
    │   │   │   │   ├── unnamed-chunk-4-3.png
    │   │   │   │   └── unnamed-chunk-4-4.png
    │   │   │   ├── header-attrs-2.10
    │   │   │   │   └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │   │   └── header-attrs.js
    │   │   ├── modeling.html
    │   │   ├── modeling_files
    │   │   │   ├── figure-html
    │   │   │   │   ├── unnamed-chunk-11-1.png
    │   │   │   │   ├── unnamed-chunk-11-2.png
    │   │   │   │   ├── unnamed-chunk-11-3.png
    │   │   │   │   └── unnamed-chunk-9-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │   │   └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │   │   └── header-attrs.js
    │   │   ├── sentiment.html
    │   │   └── sentiment_files
    │   │   │   ├── figure-html
    │   │   │       └── unnamed-chunk-10-1.png
    │   │   │   ├── header-attrs-2.10
    │   │   │       └── header-attrs.js
    │   │   │   └── header-attrs-2.9
    │   │   │       └── header-attrs.js
    │   ├── index.html
    │   ├── sentometrics.html
    │   └── sentometrics_files
    │   │   ├── header-attrs-2.10
    │   │       └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │       └── header-attrs.js
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── docsearch.json
    ├── favicon-16x16.png
    ├── favicon-32x32.png
    ├── favicon.ico
    ├── index.html
    ├── link.svg
    ├── logo.png
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── Rplot001.png
    │   ├── add_features.html
    │   ├── aggregate.sentiment.html
    │   ├── aggregate.sento_measures.html
    │   ├── as.data.table.sento_measures.html
    │   ├── as.sentiment.html
    │   ├── as.sento_corpus.html
    │   ├── attributions.html
    │   ├── compute_sentiment.html
    │   ├── corpus_summarize.html
    │   ├── ctr_agg.html
    │   ├── ctr_model.html
    │   ├── data-defunct.html
    │   ├── diff.sento_measures.html
    │   ├── epu.html
    │   ├── figures
    │   │   ├── gsoc.png
    │   │   ├── innoviris.png
    │   │   ├── ivado.png
    │   │   ├── logo.png
    │   │   ├── snsf.png
    │   │   └── swissuniversities.png
    │   ├── get_dates.html
    │   ├── get_dimensions.html
    │   ├── get_hows.html
    │   ├── get_loss_data.html
    │   ├── index.html
    │   ├── list_lexicons.html
    │   ├── list_valence_shifters.html
    │   ├── measures_fill.html
    │   ├── measures_update.html
    │   ├── merge.sentiment.html
    │   ├── nmeasures.html
    │   ├── nobs.sento_measures.html
    │   ├── peakdates.html
    │   ├── peakdocs.html
    │   ├── plot.attributions.html
    │   ├── plot.sento_measures-1.png
    │   ├── plot.sento_measures.html
    │   ├── plot.sento_modelIter.html
    │   ├── predict.sento_model.html
    │   ├── scale.sento_measures.html
    │   ├── sento_corpus.html
    │   ├── sento_lexicons.html
    │   ├── sento_measures.html
    │   ├── sento_model.html
    │   ├── sentometrics-defunct.html
    │   ├── sentometrics-deprecated.html
    │   ├── sentometrics-package.html
    │   ├── subset.sento_measures.html
    │   ├── usnews.html
    │   ├── weights_almon.html
    │   ├── weights_beta.html
    │   └── weights_exponential.html
    └── sitemap.xml
├── examples
    ├── run_vignette.R
    └── vix.rda
├── index.md
├── inst
    ├── CITATION
    └── extdata
    │   └── test_data.rda
├── man
    ├── add_features.Rd
    ├── aggregate.sentiment.Rd
    ├── aggregate.sento_measures.Rd
    ├── as.data.table.sento_measures.Rd
    ├── as.sentiment.Rd
    ├── as.sento_corpus.Rd
    ├── attributions.Rd
    ├── compute_sentiment.Rd
    ├── corpus_summarize.Rd
    ├── ctr_agg.Rd
    ├── ctr_model.Rd
    ├── data-defunct.Rd
    ├── diff.sento_measures.Rd
    ├── epu.Rd
    ├── figures
    │   ├── gsoc.png
    │   ├── innoviris.png
    │   ├── ivado.png
    │   ├── logo.png
    │   ├── snsf.png
    │   └── swissuniversities.png
    ├── get_dates.Rd
    ├── get_dimensions.Rd
    ├── get_hows.Rd
    ├── get_loss_data.Rd
    ├── list_lexicons.Rd
    ├── list_valence_shifters.Rd
    ├── measures_fill.Rd
    ├── measures_update.Rd
    ├── merge.sentiment.Rd
    ├── nmeasures.Rd
    ├── nobs.sento_measures.Rd
    ├── peakdates.Rd
    ├── peakdocs.Rd
    ├── plot.attributions.Rd
    ├── plot.sento_measures.Rd
    ├── plot.sento_modelIter.Rd
    ├── predict.sento_model.Rd
    ├── scale.sento_measures.Rd
    ├── sento_corpus.Rd
    ├── sento_lexicons.Rd
    ├── sento_measures.Rd
    ├── sento_model.Rd
    ├── sentometrics-defunct.Rd
    ├── sentometrics-deprecated.Rd
    ├── sentometrics-package.Rd
    ├── subset.sento_measures.Rd
    ├── usnews.Rd
    ├── weights_almon.Rd
    ├── weights_beta.Rd
    └── weights_exponential.Rd
├── pkgdown
    ├── _pkgdown.yml
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── src
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── SentimentScorerBigrams.h
    ├── SentimentScorerClusters.h
    ├── SentimentScorerOnegrams.h
    ├── SentimentScorerSentences.h
    ├── compute_df.cpp
    ├── compute_sentiment_onegrams.cpp
    ├── compute_sentiment_sentences.cpp
    ├── compute_sentiment_valence.cpp
    ├── fill_NAs.cpp
    ├── get_dtf_vectors.cpp
    └── utils.h
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_aggregation.R
    │   ├── test_attribution.R
    │   ├── test_corpus_building.R
    │   ├── test_measures_manipulation.R
    │   ├── test_methods_sentomeasures.R
    │   ├── test_modeling.R
    │   └── test_sentiment_computation.R
└── vignettes
    ├── applications
        ├── epu.Rmd
        └── vix.Rmd
    ├── contributions
        ├── gopress.Rmd
        ├── gopress_figures
        │   ├── read_later.jpg
        │   └── save_as.jpg
        └── isa.Rmd
    ├── development.Rmd
    ├── examples
        ├── corpus.Rmd
        ├── indexation.Rmd
        ├── modeling.Rmd
        └── sentiment.Rmd
    └── sentometrics.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^data-raw$
 4 | ^docs$
 5 | ^examples$
 6 | ^THANKS$
 7 | ^sentometrics-manual-.*\.pdf$
 8 | ^cran-comments\.md$
 9 | ^CRAN-RELEASE$
10 | ^appendix$
11 | ^_pkgdown\.yml$
12 | ^pkgdown$
13 | ^vignettes$
14 | ^index\.md$
15 | ^_TODO$
16 | ^\.github$
17 | ^CRAN-SUBMISSION$
18 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - master
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           # - {os: windows-latest, r: 'release'} # parallelization keeps complaining that nmeasures() is not recognized
26 |           - {os: macOS-latest, r: 'release'}
27 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28 |           - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
29 | 
30 |     env:
31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 |       RSPM: ${{ matrix.config.rspm }}
33 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v1
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 | 
42 |       - uses: r-lib/actions/setup-pandoc@v1
43 | 
44 |       - name: Query dependencies
45 |         run: |
46 |           install.packages('remotes')
47 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
48 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49 |         shell: Rscript {0}
50 | 
51 |       - name: Restore R package cache
52 |         uses: actions/cache@v2
53 |         with:
54 |           path: ${{ env.R_LIBS_USER }}
55 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
56 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
57 | 
58 |       - name: Install system dependencies
59 |         if: runner.os == 'Linux'
60 |         run: |
61 |           while read -r cmd
62 |           do
63 |             eval sudo $cmd
64 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
65 | 
66 |       - name: Install dependencies
67 |         run: |
68 |           remotes::install_deps(dependencies = TRUE)
69 |           remotes::install_cran("rcmdcheck")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Check
73 |         env:
74 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
75 |         run: |
76 |           options(crayon.enabled = TRUE)
77 |           rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
78 |         shell: Rscript {0}
79 | 
80 |       - name: Upload check results
81 |         if: failure()
82 |         uses: actions/upload-artifact@main
83 |         with:
84 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
85 |           path: check
86 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | src/*.o
 6 | src/*.so
 7 | src/*.dll
 8 | src-i386/
 9 | src-x64/
10 | gopress_downloads/
11 | _TODO
12 | CRAN-SUBMISSION
13 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: sentometrics
 2 | Type: Package
 3 | Title: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction
 4 | Version: 1.0.1
 5 | Authors@R: c(person("Samuel", "Borms", email = "borms_sam@hotmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9533-1870")),
 6 |   person("David", "Ardia", email = "david.ardia@hec.ca", role = c("aut"), comment = c(ORCID = "0000-0003-2823-782X")),
 7 |   person("Keven", "Bluteau", email = "keven.bluteau@usherbrooke.ca", role = c("aut"), comment = c(ORCID = "0000-0003-2990-4807")),
 8 |   person("Kris", "Boudt", email = "kris.boudt@vub.be", role = c("aut"), comment = c(ORCID = "0000-0002-1000-5142")),
 9 |   person("Jeroen", "Van Pelt", email = "jeroenvanpelt@hotmail.com", role = c("ctb")),
10 |   person("Andres", "Algaba", email = "andres.algaba@vub.be", role = c("ctb")))
11 | Maintainer: Samuel Borms <borms_sam@hotmail.com>
12 | Description: Optimized prediction based on textual sentiment, accounting for the intrinsic challenge that sentiment can be computed and pooled across texts and time in various ways. See Ardia et al. (2021) <doi:10.18637/jss.v099.i02>.
13 | Depends: R (>= 3.3.0)
14 | License: GPL (>= 2)
15 | BugReports: https://github.com/SentometricsResearch/sentometrics/issues
16 | URL: https://sentometrics-research.com/sentometrics/
17 | Encoding: UTF-8
18 | LazyData: true
19 | Suggests: covr, 
20 |   doParallel, 
21 |   e1071,
22 |   lexicon,
23 |   MCS,
24 |   NLP,
25 |   parallel, 
26 |   randomForest,
27 |   stopwords,
28 |   testthat,
29 |   tm
30 | Imports: caret,
31 |   compiler,
32 |   data.table,
33 |   foreach,
34 |   ggplot2, 
35 |   glmnet,
36 |   ISOweek,
37 |   quanteda,
38 |   Rcpp (>= 0.12.13), 
39 |   RcppRoll, 
40 |   RcppParallel, 
41 |   stats, 
42 |   stringi,
43 |   utils
44 | LinkingTo: Rcpp, RcppArmadillo, RcppParallel
45 | RoxygenNote: 7.3.2
46 | SystemRequirements: GNU make
47 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method("$<-",sento_lexicons)
 4 | S3method("[",sento_lexicons)
 5 | S3method("[<-",sento_lexicons)
 6 | S3method("[[<-",sento_lexicons)
 7 | S3method("docvars<-",sento_corpus)
 8 | S3method("names<-",sento_lexicons)
 9 | S3method(aggregate,sentiment)
10 | S3method(aggregate,sento_measures)
11 | S3method(as.data.frame,sento_corpus)
12 | S3method(as.data.frame,sento_measures)
13 | S3method(as.data.table,sento_corpus)
14 | S3method(as.data.table,sento_measures)
15 | S3method(as.sentiment,data.frame)
16 | S3method(as.sentiment,data.table)
17 | S3method(as.sento_corpus,SimpleCorpus)
18 | S3method(as.sento_corpus,VCorpus)
19 | S3method(as.sento_corpus,corpus)
20 | S3method(attributions,sento_model)
21 | S3method(attributions,sento_modelIter)
22 | S3method(compute_sentiment,SimpleCorpus)
23 | S3method(compute_sentiment,VCorpus)
24 | S3method(compute_sentiment,character)
25 | S3method(compute_sentiment,corpus)
26 | S3method(compute_sentiment,sento_corpus)
27 | S3method(diff,sento_measures)
28 | S3method(merge,sentiment)
29 | S3method(nmeasures,sento_measures)
30 | S3method(nobs,sento_measures)
31 | S3method(plot,attributions)
32 | S3method(plot,sento_measures)
33 | S3method(plot,sento_modelIter)
34 | S3method(predict,sento_model)
35 | S3method(print,sento_corpus)
36 | S3method(print,sento_measures)
37 | S3method(print,sento_model)
38 | S3method(print,sento_modelIter)
39 | S3method(scale,sento_measures)
40 | S3method(subset,sento_measures)
41 | S3method(summary,sento_measures)
42 | S3method(summary,sento_model)
43 | S3method(summary,sento_modelIter)
44 | export(add_features)
45 | export(as.sentiment)
46 | export(as.sento_corpus)
47 | export(attributions)
48 | export(compute_sentiment)
49 | export(corpus_summarize)
50 | export(ctr_agg)
51 | export(ctr_model)
52 | export(get_dates)
53 | export(get_dimensions)
54 | export(get_hows)
55 | export(get_loss_data)
56 | export(measures_fill)
57 | export(measures_update)
58 | export(nmeasures)
59 | export(peakdates)
60 | export(peakdocs)
61 | export(sento_corpus)
62 | export(sento_lexicons)
63 | export(sento_measures)
64 | export(sento_model)
65 | export(weights_almon)
66 | export(weights_beta)
67 | export(weights_exponential)
68 | import(data.table)
69 | import(ggplot2)
70 | importFrom(Rcpp,evalCpp)
71 | importFrom(RcppParallel,RcppParallelLibs)
72 | importFrom(compiler,cmpfun)
73 | importFrom(foreach,"%dopar%")
74 | importFrom(quanteda,"docvars<-")
75 | importFrom(stats,aggregate)
76 | importFrom(stats,nobs)
77 | useDynLib(sentometrics,.registration = TRUE)
78 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | compute_df <- function(alpha, lambda, xA) {
 5 |     .Call(`_sentometrics_compute_df`, alpha, lambda, xA)
 6 | }
 7 | 
 8 | compute_sentiment_onegrams <- function(texts, lexicons, how) {
 9 |     .Call(`_sentometrics_compute_sentiment_onegrams`, texts, lexicons, how)
10 | }
11 | 
12 | compute_sentiment_sentences <- function(texts, lexicons, how, valenceType) {
13 |     .Call(`_sentometrics_compute_sentiment_sentences`, texts, lexicons, how, valenceType)
14 | }
15 | 
16 | compute_sentiment_valence <- function(texts, lexicons, how) {
17 |     .Call(`_sentometrics_compute_sentiment_valence`, texts, lexicons, how)
18 | }
19 | 
20 | fill_NAs <- function(x) {
21 |     .Call(`_sentometrics_fill_NAs`, x)
22 | }
23 | 
24 | get_dtf_vectors <- function(texts) {
25 |     .Call(`_sentometrics_get_dtf_vectors`, texts)
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | 
 2 | .onLoad <- function(libname = find.package("sentometrics"), pkgname = "sentometrics") {
 3 |   # CRAN note avoidance
 4 |   if (getRversion() >= "2.15.1")
 5 |     utils::globalVariables(
 6 |       c("value", "variable", "word_count", "w",
 7 |         "attrib", "feature", "id", "i", "wLex",
 8 |         "wFeat", "wTime", "x", "identifier",
 9 |         ".", "documents", "language", "nTokens",
10 |         "lag", "sentence_id", "n", "pos", "neg")
11 |     )
12 |   invisible()
13 | }
14 | 
15 | .onUnload <- function (libpath) {
16 |   library.dynam.unload("sentometrics", libpath)
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <a href='https://sentometrics-research.com'><img src='man/figures/logo.png' align="right" height="138.5"/></a>
 3 | 
 4 | ## _sentometrics_: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction
 5 | 
 6 | <!-- badges: start -->
 7 | [![CRAN](https://www.r-pkg.org/badges/version/sentometrics)](https://cran.r-project.org/package=sentometrics)
 8 | [![Downloads](https://cranlogs.r-pkg.org/badges/last-day/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
 9 | [![Downloads](https://cranlogs.r-pkg.org/badges/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
10 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
11 | <!-- [![codecov](https://codecov.io/github/SentometricsResearch/sentometrics/branch/master/graphs/badge.svg)](https://codecov.io/github/SentometricsResearch/sentometrics) -->
12 | <!-- [![Pending Pull-Requests](https://githubbadges.herokuapp.com/SentometricsResearch/sentometrics/pulls.svg?style=flat)](https://github.com/SentometricsResearch/sentometrics/pulls) -->
13 | <!-- [![Github Issues](https://githubbadges.herokuapp.com/SentometricsResearch/sentometrics/issues.svg)](https://github.com/SentometricsResearch/sentometrics/issues) -->
14 | <!-- [![R-CMD-check](https://github.com/SentometricsResearch/sentometrics/workflows/R-CMD-check/badge.svg)](https://github.com/SentometricsResearch/sentometrics/actions) -->
15 | <!-- badges: end -->
16 | 
17 | ### Introduction
18 | 
19 | The **`sentometrics`** package is an **integrated framework for textual sentiment time series aggregation and prediction**. It accounts for the intrinsic challenge that textual sentiment can be computed in many different ways, as well as the large number of possibilities to pool sentiment into a time series index. The package integrates the fast _quantification_ of sentiment from texts, the _aggregation_ into different sentiment time series, and the _prediction_ based on these measures. All in one coherent workflow!
20 | 
21 | See the [package website](https://sentometrics-research.com/sentometrics/) and the [vignette](https://doi.org/10.18637/jss.v099.i02) published in the Journal of Statistical Software for plenty of examples and details. We also refer to our [survey](https://doi.org/10.1111/joes.12370) organized as an overview of the required steps in a typical econometric analysis of sentiment from alternative (such as textual) data, and following companion [web page](https://sborms.github.io/econometrics-meets-sentiment/).
22 | 
23 | ### Installation
24 | 
25 | To install the package from CRAN, simply do:
26 | 
27 | ```R
28 | install.packages("sentometrics")
29 | ```
30 | 
31 | To install the latest development version of **`sentometrics`** (which may contain bugs!), execute:
32 | 
33 | ```R
34 | devtools::install_github("SentometricsResearch/sentometrics")
35 | ```
36 | 
37 | ### Shiny application
38 | 
39 | For a visual interface as a Shiny application of the package's core functionalities, install the [**`sentometrics.app`**](https://github.com/DataWanderers/sentometrics.app) package, and run the `sento_app()` function.
40 | 
41 | ### Reference
42 | 
43 | Please cite **`sentometrics`** in publications. Use `citation("sentometrics")`.
44 | 
45 | ### Acknowledgements
46 | 
47 | This software package originates from a
48 | [Google Summer of Code 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Sentometrics:-An-integrated-framework-for-text-based-multivariate-time-series-modeling-and-forecasting) project, was further developed 
49 | during a follow-up [Google Summer of Code 2019](https://github.com/rstats-gsoc/gsoc2019/wiki/sentometrics) project, and benefited generally from financial support by [Innoviris](https://www.innoviris.brussels/), [IVADO](https://ivado.ca/), [swissuniversities](https://www.swissuniversities.ch), and the [Swiss National Science Foundation](http://www.snf.ch) (grants #179281 and #191730).
50 | 
51 | 


--------------------------------------------------------------------------------
/Sentometrics.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: aad29760-aff4-4264-a2a0-cfc47f37bf4d
 3 | 
 4 | RestoreWorkspace: No
 5 | SaveWorkspace: No
 6 | AlwaysSaveHistory: No
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/appendix/output_timings.txt:
--------------------------------------------------------------------------------
 1 | Run timings for texts size of 1000 
 2 | Run timings for texts size of 5000 
 3 | Run timings for texts size of 10000 
 4 | Run timings for texts size of 25000 
 5 | Run timings for texts size of 50000 
 6 | Run timings for texts size of 75000 
 7 | Run timings for texts size of 1e+05 
 8 | 
 9 | Run timings for texts size of 1000 
10 | Run timings for texts size of 5000 
11 | Run timings for texts size of 10000 
12 | Run timings for texts size of 25000 
13 | Run timings for texts size of 50000 
14 | Run timings for texts size of 75000 
15 | Run timings for texts size of 1e+05 
16 | 
17 | Run timings for texts size of 1000 
18 | Run timings for texts size of 5000 
19 | Run timings for texts size of 10000 
20 | Run timings for texts size of 25000 
21 | Run timings for texts size of 50000 
22 | Run timings for texts size of 75000 
23 | Run timings for texts size of 1e+05 
24 | 
25 | PANEL A
26 |     texts sento_unigrams sento_bigrams sento_clusters  meanr SentimentAnalysis syuzhet quanteda tidytext
27 | 1:   1000         0.2447        0.1976         0.2237 0.0777             1.180  0.5468   0.5985   0.1605
28 | 2:   5000         0.8670        0.8678         0.9144 0.3420             5.257  1.9872   1.7366   0.5995
29 | 3:  10000         1.7251        1.6773         1.7209 0.6688            11.225  3.8307   3.0742   1.1110
30 | 4:  25000         4.4119        4.2144         4.4019 1.7121            26.875  9.0715   7.1894   2.8258
31 | 5:  50000         9.1801        8.5456         9.4217 3.7477            53.084 18.3654  14.1207   5.8835
32 | 6:  75000        13.6154       13.4873        13.4365 5.0550            78.437 27.1280  20.3666   8.4837
33 | 7: 100000        18.6920       18.2231        18.6149 6.5685           109.576 35.2517  26.9816  11.0646
34 |  
35 | PANEL B
36 |     texts sento_unigrams_many sento_unigrams_many_features sento_bigrams_many sento_clusters_many
37 | 1:   1000              0.2608                       0.2394             0.2661              0.2649
38 | 2:   5000              1.0047                       0.8692             1.0122              1.0133
39 | 3:  10000              1.9642                       1.6790             1.9799              1.9657
40 | 4:  25000              4.8174                       4.2360             4.9001              4.9665
41 | 5:  50000              9.9563                       8.7101            10.1299             10.0230
42 | 6:  75000             16.6963                      19.1378            16.6728             23.0397
43 | 7: 100000             32.3982                      23.6591            23.8046             36.4094
44 |    sento_clusters_many_parallel tidytext_unigrams_many tidytext_bigrams_many
45 | 1:                       0.2183                 0.2088                0.6596
46 | 2:                       0.7902                 0.6694                2.7952
47 | 3:                       1.5441                 1.2747                5.6750
48 | 4:                       3.8085                 3.0665               13.9461
49 | 5:                       7.8541                 6.0343               27.9955
50 | 6:                      15.4257                13.9951               58.0154
51 | 7:                      30.8611                14.0195               64.7318
52 | 
53 | ############################## 
54 | ###### SESSION INFO 
55 |  
56 | R version 3.6.2 (2019-12-12)
57 | Platform: x86_64-w64-mingw32/x64 (64-bit)
58 | Running under: Windows 10 x64 (build 18362)
59 | 
60 | Matrix products: default
61 | 
62 | locale:
63 | [1] LC_COLLATE=English_Belgium.1252  LC_CTYPE=English_Belgium.1252    LC_MONETARY=English_Belgium.1252
64 | [4] LC_NUMERIC=C                     LC_TIME=English_Belgium.1252    
65 | 
66 | attached base packages:
67 | [1] stats     graphics  grDevices utils     datasets  methods   base     
68 | 
69 | other attached packages:
70 |  [1] microbenchmark_1.4-7    tidyr_1.0.0             dplyr_0.8.3             lexicon_1.2.1          
71 |  [5] data.table_1.12.8       SentimentAnalysis_1.3-3 syuzhet_1.0.4           meanr_0.1-2            
72 |  [9] tidytext_0.2.2          quanteda_1.5.2          sentometrics_0.8.0     
73 | 
74 | loaded via a namespace (and not attached):
75 |  [1] NLP_0.2-0          Rcpp_1.0.3         pillar_1.4.3       compiler_3.6.2     tokenizers_0.2.1   iterators_1.0.12  
76 |  [7] tools_3.6.2        stopwords_1.0      zeallot_0.1.0      packrat_0.5.0      lubridate_1.7.4    lifecycle_0.1.0   
77 | [13] tibble_2.1.3       gtable_0.3.0       lattice_0.20-38    pkgconfig_2.0.3    rlang_0.4.2        Matrix_1.2-18     
78 | [19] foreach_1.4.7      fastmatch_1.1-0    rstudioapi_0.10    parallel_3.6.2     xml2_1.2.2         janeaustenr_0.1.5 
79 | [25] stringr_1.4.0      vctrs_0.2.1        generics_0.0.2     grid_3.6.2         tidyselect_0.2.5   glue_1.3.1        
80 | [31] R6_2.4.1           ggplot2_3.2.1      purrr_0.3.3        spacyr_1.2         magrittr_1.5       ellipsis_0.3.0    
81 | [37] backports_1.1.5    SnowballC_0.6.0    scales_1.1.0       codetools_0.2-16   assertthat_0.2.1   colorspace_1.4-1  
82 | [43] stringi_1.4.5      RcppParallel_4.4.4 lazyeval_0.2.2     munsell_0.5.0      slam_0.1-47        tm_0.7-7          
83 | [49] crayon_1.3.4      
84 | 
85 | 


--------------------------------------------------------------------------------
/appendix/vignette_supplementary_appendix.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/appendix/vignette_supplementary_appendix.pdf


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## resubmission (version 1.0.1) [03/04/2025]
  3 | 
  4 | - removed one faulty URL --->
  5 | 
  6 | <!--- ## submission (version 1.0.1) [02/04/2025]
  7 | 
  8 | - fixed NOTEs about Rd files with link targets missing package anchors --->
  9 | 
 10 | <!--- ## resubmission (version 1.0.0) [17/08/2021]
 11 | 
 12 | - adjusted one URL --->
 13 | 
 14 | <!--- ## submission (version 1.0.0) [17/08/2021]
 15 | 
 16 | - version bump associated with release of JSS publication; the DOI in the CITATION is for a new JSS publication that will be registered after publication on CRAN --->
 17 | 
 18 | <!--- ## resubmission (version 0.8.4) [19/04/2021]
 19 | 
 20 | - adjusted one URL, dropped one test (resulted in minor differences only on Debian), added three packages to Suggests --->
 21 | 
 22 | <!--- ## submission (version 0.8.4) [17/04/2021]
 23 | 
 24 | **new**: internal compliance fixes related to newest quanteda API (resolves CRAN significant warnings) --->
 25 | 
 26 | <!--- ## resubmission (version 0.8.3) [17/02/2021]
 27 | 
 28 | - adjusted some URL and doi links that gave redirects (devtools::check_win_devel() revealed no remaining NOTEs) --->
 29 | 
 30 | <!--- ## submission (version 0.8.3) [13/02/2021]
 31 | 
 32 | **new**: minor bug and documentation fixes
 33 | 
 34 | - replacement of some order() calls, to get package back on CRAN --->
 35 | 
 36 | <!--- ## resubmission (version 0.8.2) [25/06/2020]
 37 | 
 38 | - modified invalid doi markup in CITATION file --->
 39 | 
 40 | <!--- ## submission (version 0.8.2) [24/06/2020]
 41 | 
 42 | **new**: minor bug and documentation fixes, release of website
 43 | 
 44 | - solves open CRAN errors on some platforms --->
 45 | 
 46 | <!--- ## submission (version 0.8.1) [11/03/2020]
 47 | 
 48 | **new**: compliance fixes related to new quanteda release --->
 49 | 
 50 | <!--- ## submission (version 0.8) [13/01/2020]
 51 | 
 52 | **new**: minor improvements --->
 53 | 
 54 | <!--- ## submission (version 0.7.6) [31/10/2019]
 55 | 
 56 | - fixed memory leak bug --->
 57 | 
 58 | <!--- ## submission (version 0.7.5) [30/10/2019]
 59 | 
 60 | **new**: slight bug, documentation and consistency fixes --->
 61 | 
 62 | <!--- ## submission (version 0.7) [12/09/2019]
 63 | 
 64 | **new**: increased the flexibility in the sentiment calculation (sentence-level calculation and more weighting schemes), simplified certain functionalities to allow for a more R-based workflow, added a Shiny application --->
 65 | 
 66 | <!--- ## submission (version 0.5.6) [17/12/2018]
 67 | 
 68 | **new**: very minor update (one function change and a few documentation fixes) --->
 69 | 
 70 | <!--- ## submission (version 0.5.5) [15/11/2018]
 71 | 
 72 | **new**: minor additions and simplifications
 73 | 
 74 | - resolved failing test for old R version 3.4.4
 75 | - diminished the number of Imports --->
 76 | 
 77 | <!--- ## submission (version 0.5.1) [20/09/2018]
 78 | 
 79 | **new**: minor modifications, mainly to resolve CRAN check issues
 80 | 
 81 | - set number of default threads used to 1, to avoid UBSAN warnings coming from usage of RcppParallel
 82 | - modified C++ code to avoid Solaris error --->
 83 | 
 84 | <!--- ## submission (version 0.5) [18/09/2018]
 85 | 
 86 | **new**: reimplementation of sentiment calculation code in C++, final set of API changes for better overall clarity, small bug and documentation fixes
 87 | 
 88 | - installed size > 5Mb, due to more compiled code
 89 | - examples now run significantly faster because of speed improvements --->
 90 | 
 91 | <!--- ## resubmission (version 0.4) [28/05/2018]
 92 | 
 93 | - modified example that took too long (to pass pre-test) --->
 94 | 
 95 | <!--- ## submission (version 0.4) [28/05/2018]
 96 | 
 97 | **new**: several additional functions and functionalities, and a few API changes --->
 98 | 
 99 | <!--- ## submission (version 0.3.5) [26/03/2018]
100 | 
101 | **new**: minor but necessary patches in to_global() and compute_sentiment() functions --->
102 | 
103 | <!--- ## resubmission (version 0.3) [18/03/2018]
104 | 
105 | - some examples modified to diminish elapsed time (to pass pre-test)
106 | - R depends now >= 3.3.0, import of sentimentr omitted --->
107 | 
108 | <!--- ## submission (version 0.3) [18/03/2018]
109 | 
110 | **new**: several additional functions and arguments, small bug fixes and clarifications in documentation
111 | 
112 | - marked UTF-8 strings will remain; this is intentional and comes from the built-in French (mostly) and Dutch word lists --->
113 | 
114 | <!--- ## resubmission (version 0.2) [12/11/2017]
115 | 
116 | - added reference to vignette paper in 'Description' field of DESCRIPTION file
117 | - we relocated the code to the GitHub repo 'sborms/sentometrics' 
118 | - changed quanteda::tokenize() to quanteda::tokens() due to errors in automatic checks by CRAN --->
119 | 
120 | 


--------------------------------------------------------------------------------
/data-raw/FEEL_eng_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_eng_tr.rda


--------------------------------------------------------------------------------
/data-raw/FEEL_fr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_fr.rda


--------------------------------------------------------------------------------
/data-raw/FEEL_nl_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_nl_tr.rda


--------------------------------------------------------------------------------
/data-raw/GI_eng.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_eng.rda


--------------------------------------------------------------------------------
/data-raw/GI_fr_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_fr_tr.rda


--------------------------------------------------------------------------------
/data-raw/GI_nl_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_nl_tr.rda


--------------------------------------------------------------------------------
/data-raw/HENRY_eng.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_eng.rda


--------------------------------------------------------------------------------
/data-raw/HENRY_fr_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_fr_tr.rda


--------------------------------------------------------------------------------
/data-raw/HENRY_nl_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_nl_tr.rda


--------------------------------------------------------------------------------
/data-raw/LM_eng.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_eng.rda


--------------------------------------------------------------------------------
/data-raw/LM_fr_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_fr_tr.rda


--------------------------------------------------------------------------------
/data-raw/LM_nl_tr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_nl_tr.rda


--------------------------------------------------------------------------------
/data-raw/US_economic_news_1951-2014.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/US_economic_news_1951-2014.csv


--------------------------------------------------------------------------------
/data-raw/_sources.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | LIST_LEXICONS:
 3 |   LM: https://www3.nd.edu/~mcdonald/Word_Lists.html
 4 |     > Tim Loughran and Bill McDonald, 2011, "When is a Liability not a Liability? Textual Analysis, Dictionaries, and 10-Ks", Journal of Finance, 66:1, 35-65
 5 | 
 6 |   HENRY: paper pp. 387
 7 |     > Elaine Henry. (2008). "Are Investors Influenced by the Way Earnings Press Releases are Written?", The Journal of Business Communication, 4 (45), 363-407
 8 | 
 9 |   GI: http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm
10 |     > Harvard IV-4 dictionary + Lasswell value dictionary
11 | 
12 |   FEEL: http://www.lirmm.fr/~abdaoui/FEEL
13 |     > Amine Abdaoui, Jérôme Azé, Sandra Bringay et Pascal Poncelet. "FEEL: French Expanded Emotion Lexicon". Language Resources and Evaluation, LRE 2016, pp. 1-23
14 | 
15 | LIST_VALENCE_SHIFTERS: R package lexicon
16 | 
17 | USNEWS: https://www.crowdflower.com/data-for-everyone ("Economic News Article Tone and Relevance")
18 | 
19 | EPU: http://www.policyuncertainty.com/us_monthly.html
20 | 
21 | 


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/FEEL.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/FEEL_eng.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL_eng.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/FEEL_nl.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL_nl.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/GI_fr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/GI_fr.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/GI_nl.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/GI_nl.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/HENRY.csv:
--------------------------------------------------------------------------------
  1 | Word;Polarity
  2 | negative;-1
  3 | negatives;-1
  4 | fail;-1
  5 | fails;-1
  6 | failing;-1
  7 | failure;-1
  8 | weak;-1
  9 | weakness;-1
 10 | weaknesses;-1
 11 | difficult;-1
 12 | difficulty;-1
 13 | hurdle;-1
 14 | hurdles;-1
 15 | obstacle;-1
 16 | obstacles;-1
 17 | slump;-1
 18 | slumps;-1
 19 | slumping;-1
 20 | slumped;-1
 21 | uncertain;-1
 22 | uncertainty;-1
 23 | unsettled;-1
 24 | unfavorable;-1
 25 | downturn;-1
 26 | depressed;-1
 27 | disappoint;-1
 28 | disappoints;-1
 29 | disappointing;-1
 30 | disappointed;-1
 31 | disappointment;-1
 32 | risk;-1
 33 | risks;-1
 34 | risky;-1
 35 | threat;-1
 36 | threats;-1
 37 | penalty;-1
 38 | penalties;-1
 39 | down;-1
 40 | decrease;-1
 41 | decreases;-1
 42 | decreasing;-1
 43 | decreased;-1
 44 | decline;-1
 45 | declines;-1
 46 | declining;-1
 47 | declined;-1
 48 | fall;-1
 49 | falls;-1
 50 | falling;-1
 51 | fell;-1
 52 | fallen;-1
 53 | drop;-1
 54 | drops;-1
 55 | dropping;-1
 56 | dropped;-1
 57 | deteriorate;-1
 58 | deteriorates;-1
 59 | deteriorating;-1
 60 | deteriorated;-1
 61 | worsen;-1
 62 | worsens;-1
 63 | worsening;-1
 64 | weaken;-1
 65 | weakens;-1
 66 | weakening;-1
 67 | weakened;-1
 68 | worse;-1
 69 | worst;-1
 70 | low;-1
 71 | lower;-1
 72 | lowest;-1
 73 | less;-1
 74 | least;-1
 75 | smaller;-1
 76 | smallest;-1
 77 | shrink;-1
 78 | shrinks;-1
 79 | shrinking;-1
 80 | shrunk;-1
 81 | below;-1
 82 | under;-1
 83 | challenge;-1
 84 | challenges;-1
 85 | challenging;-1
 86 | challenged;-1
 87 | positive;1
 88 | positives;1
 89 | success;1
 90 | successes;1
 91 | successful;1
 92 | succeed;1
 93 | succeeds;1
 94 | succeeding;1
 95 | succeeded;1
 96 | accomplish;1
 97 | accomplishes;1
 98 | accomplishing;1
 99 | accomplished;1
100 | accomplishment;1
101 | accomplishments;1
102 | strong;1
103 | strength;1
104 | strengths;1
105 | certain;1
106 | certainty;1
107 | definite;1
108 | solid;1
109 | excellent;1
110 | good;1
111 | leading;1
112 | achieve;1
113 | achieves;1
114 | achieved;1
115 | achieving;1
116 | achievement;1
117 | achievements;1
118 | progress;1
119 | progressing;1
120 | deliver;1
121 | delivers;1
122 | delivered;1
123 | delivering;1
124 | leader;1
125 | leading;1
126 | pleased;1
127 | reward;1
128 | rewards;1
129 | rewarding;1
130 | rewarded;1
131 | opportunity;1
132 | opportunities;1
133 | enjoy;1
134 | enjoys;1
135 | enjoying;1
136 | enjoyed;1
137 | encouraged;1
138 | encouraging;1
139 | up;1
140 | increase;1
141 | increases;1
142 | increasing;1
143 | increased;1
144 | rise;1
145 | rises;1
146 | rising;1
147 | rose;1
148 | risen;1
149 | improve;1
150 | improves;1
151 | improving;1
152 | improved;1
153 | improvement;1
154 | improvements;1
155 | strengthen;1
156 | strengthens;1
157 | strengthening;1
158 | strengthened;1
159 | stronger;1
160 | strongest;1
161 | better;1
162 | best;1
163 | more;1
164 | most;1
165 | above;1
166 | record;1
167 | high;1
168 | higher;1
169 | highest;1
170 | greater;1
171 | greatest;1
172 | larger;1
173 | largest;1
174 | grow;1
175 | grows;1
176 | growing;1
177 | grew;1
178 | grown;1
179 | growth;1
180 | expand;1
181 | expands;1
182 | expanding;1
183 | expanded;1
184 | expansion;1
185 | exceed;1
186 | exceeds;1
187 | exceeded;1
188 | exceeding;1
189 | beat;1
190 | beats;1
191 | beating;1
192 | 


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/HENRY_fr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/HENRY_fr.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/HENRY_nl.csv:
--------------------------------------------------------------------------------
  1 | Word;Polarity;Retranslation
  2 | negatieve;-1;negative
  3 | negatieven;-1;negatives
  4 | Fail;-1;Fail
  5 | mislukt;-1;failed
  6 | falende;-1;failing
  7 | storing;-1;failure
  8 | zwak;-1;weak
  9 | zwakte;-1;weakness
 10 | zwakke punten;-1;weak points
 11 | moeilijk;-1;difficult
 12 | moeilijkheidsgraad;-1;level of difficulty
 13 | hindernis;-1;obstacle
 14 | horden;-1;hurdles
 15 | obstakel;-1;obstacle
 16 | obstakels;-1;obstacles
 17 | malaise;-1;malaise
 18 | laagconjunctuur;-1;slumps
 19 | slumping;-1;slumping
 20 | zakte;-1;slumped
 21 | onzeker;-1;uncertain
 22 | onzekerheid;-1;uncertainty
 23 | onrustig;-1;restless
 24 | ongunstige;-1;unfavorable
 25 | neergang;-1;downturn
 26 | depressief;-1;depressed
 27 | teleurstellen;-1;disappoint
 28 | stelt teleur;-1;disappoints
 29 | teleurstellend;-1;disappointing
 30 | teleurgesteld;-1;disappointed
 31 | teleurstelling;-1;disappointment
 32 | risico;-1;risk
 33 | risico 's;-1;risks
 34 | riskant;-1;risky
 35 | bedreiging;-1;threat
 36 | bedreigingen;-1;threats
 37 | straf;-1;Criminal
 38 | sancties;-1;sanctions
 39 | naar beneden;-1;down
 40 | daling;-1;fall
 41 | vermindert;-1;reduces
 42 | minderen;-1;Juniors
 43 | daalde;-1;sank
 44 | daling;-1;fall
 45 | dalingen;-1;falls
 46 | dalende;-1;falling
 47 | daalde;-1;sank
 48 | Val;-1;Val
 49 | Falls;-1;Falls
 50 | die vallen;-1;that fall
 51 | viel;-1;fell
 52 | gedaald;-1;dropped
 53 | drop;-1;drop
 54 | DROPS;-1;DROPS
 55 | dropping;-1;dropping
 56 | gedaald;-1;dropped
 57 | verslechteren;-1;deteriorate
 58 | verslechtert;-1;deteriorates
 59 | verslechterende;-1;deteriorating
 60 | verslechterd;-1;deteriorated
 61 | verergeren;-1;worsen
 62 | verergert;-1;worsens
 63 | verslechtering;-1;deterioration
 64 | verzwakken;-1;weaken
 65 | verzwakt;-1;weakened
 66 | verzwakking;-1;weakening
 67 | verzwakt;-1;weakened
 68 | erger;-1;worse
 69 | slechtste;-1;worst
 70 | lage;-1;low
 71 | lagere;-1;lower
 72 | laagste;-1;lowest
 73 | minder;-1;less
 74 | minste;-1;least
 75 | kleinere;-1;smaller
 76 | kleinste;-1;smallest
 77 | krimpen;-1;shrink
 78 | krimpt;-1;shrinks
 79 | krimpen;-1;shrink
 80 | gekrompen;-1;shrunk
 81 | Hieronder;-1;Below
 82 | onder;-1;under
 83 | uitdaging;-1;challenge
 84 | uitdagingen;-1;challenges
 85 | uitdagende;-1;challenging
 86 | uitgedaagd;-1;challenged
 87 | positieve;1;positive
 88 | positieven;1;positives
 89 | succes;1;Good luck
 90 | successen;1;successes
 91 | succesvolle;1;successful
 92 | slagen;1;succeed
 93 | slaagt;1;succeeds
 94 | slagen;1;succeed
 95 | geslaagd;1;managed
 96 | bereiken;1;reach
 97 | volbrengt;1;accomplishes
 98 | volbrengen;1;accomplish
 99 | bereikt;1;achieved
100 | prestatie;1;performance
101 | prestaties;1;performance
102 | sterke;1;strong
103 | sterkte;1;strength
104 | sterke punten;1;strong points
105 | bepaalde;1;certain
106 | zekerheid;1;Security
107 | duidelijke;1;clear
108 | solide;1;solid
109 | Uitstekend;1;Excellent
110 | goede;1;good
111 | leiden;1;lead
112 | bereiken;1;reach
113 | bereikt;1;achieved
114 | bereikt;1;achieved
115 | verwezenlijking van;1;creation of
116 | prestatie;1;performance
117 | resultaten;1;results
118 | vooruitgang;1;progress
119 | vordert;1;progresses
120 | leveren;1;deliver
121 | levert;1;delivers
122 | afgeleverd;1;delivered
123 | leveren van;1;delivering
124 | leider;1;leader
125 | leiden;1;lead
126 | blij;1;happy
127 | beloning;1;reward
128 | beloningen;1;Rewards
129 | belonen;1;reward
130 | beloond;1;rewarded
131 | kans;1;chance
132 | kansen;1;opportunities
133 | Geniet van;1;Enjoy
134 | Geniet van;1;Enjoy
135 | genieten van;1;enjoy
136 | genoten;1;enjoyed
137 | aangemoedigd;1;encouraged
138 | stimuleren;1;boost
139 | omhoog;1;up
140 | verhoging van de;1;increase of the
141 | verhoogt;1;increases
142 | verhogen;1;increase
143 | verhoogd;1;increased
144 | opkomst;1;attendance
145 | stijgt;1;increases
146 | stijgen;1;ascent
147 | steeg;1;rose
148 | gestegen;1;increased
149 | verbeteren;1;improve
150 | verbetert;1;improves
151 | verbetering van de;1;improvement of the
152 | verbeterd;1;improved
153 | verbetering;1;improvement
154 | verbeteringen;1;improvements
155 | versterken;1;strengthening
156 | versterkt;1;strengthened
157 | versterking van de;1;strengthening of the
158 | versterkt;1;strengthened
159 | sterker;1;stronger
160 | sterkste;1;strongest
161 | beter;1;better
162 | beste;1;best
163 | meer;1;more
164 | de meeste;1;most
165 | boven;1;above
166 | record;1;record
167 | hoge;1;high
168 | hogere;1;higher
169 | hoogste;1;highest
170 | meer;1;more
171 | grootste;1;largest
172 | grotere;1;larger
173 | grootste;1;largest
174 | groeien;1;grow
175 | groeit;1;grows
176 | groeiende;1;growing
177 | groeide;1;grew
178 | gegroeid;1;grown
179 | groei;1;growth
180 | Vouw;1;Fold
181 | breidt uit;1;expands
182 | uit te breiden;1;expand
183 | uitgebreid;1;extended
184 | uitbreiding;1;extension
185 | hoger zijn dan;1;higher than
186 | overschrijdt;1;exceeds
187 | overschreden;1;exceeded
188 | meer dan;1;more than
189 | Beat;1;Beat
190 | beats;1;beats
191 | pak slaag;1;spanking
192 | 


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/LM_fr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/LM_fr.csv


--------------------------------------------------------------------------------
/data-raw/lexicons-raw/LM_nl.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/LM_nl.csv


--------------------------------------------------------------------------------
/data-raw/valence-raw/valShifters.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence-raw/valShifters.rda


--------------------------------------------------------------------------------
/data-raw/valence_eng.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_eng.rda


--------------------------------------------------------------------------------
/data-raw/valence_fr.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_fr.rda


--------------------------------------------------------------------------------
/data-raw/valence_nl.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_nl.rda


--------------------------------------------------------------------------------
/data/epu.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/epu.rda


--------------------------------------------------------------------------------
/data/list_lexicons.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/list_lexicons.rda


--------------------------------------------------------------------------------
/data/list_valence_shifters.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/list_valence_shifters.rda


--------------------------------------------------------------------------------
/data/usnews.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/usnews.rda


--------------------------------------------------------------------------------
/docs/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/docs/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon.png


--------------------------------------------------------------------------------
/docs/articles/applications/epu_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/epu_files/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/epu_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/epu_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/applications/epu_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/figure-html/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-2.png


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/applications/vix_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_figures/read_later.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_figures/read_later.jpg


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_figures/save_as.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_figures/save_as.jpg


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/figure-html/sento 3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/sento 3-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/figure-html/sento topic 3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/sento topic 3-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/contributions/gopress_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/docs/articles/contributions/isa_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/contributions/isa_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/development_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/development_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-2.png


--------------------------------------------------------------------------------
/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-3.png


--------------------------------------------------------------------------------
/docs/articles/examples/corpus_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/corpus_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-2.png


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-3.png


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-4.png


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/indexation_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-2.png


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-3.png


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/modeling_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/sentiment_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/sentiment_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/articles/examples/sentiment_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/examples/sentiment_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/sentometrics_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/sentometrics_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/docsearch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "index_name": "sentometrics",
 3 |   "start_urls": [
 4 |     {
 5 |       "url": "https://sentometricsresearch.github.io/sentometrics/index.html",
 6 |       "selectors_key": "homepage",
 7 |       "tags": [
 8 |         "homepage"
 9 |       ]
10 |     },
11 |     {
12 |       "url": "https://sentometricsresearch.github.io/sentometrics/reference",
13 |       "selectors_key": "reference",
14 |       "tags": [
15 |         "reference"
16 |       ]
17 |     },
18 |     {
19 |       "url": "https://sentometricsresearch.github.io/sentometrics/articles",
20 |       "selectors_key": "articles",
21 |       "tags": [
22 |         "articles"
23 |       ]
24 |     }
25 |   ],
26 |   "stop_urls": [
27 |     "/reference/$",
28 |     "/reference/index.html",
29 |     "/articles/$",
30 |     "/articles/index.html"
31 |   ],
32 |   "sitemap_urls": [
33 |     "https://sentometricsresearch.github.io/sentometrics/sitemap.xml"
34 |   ],
35 |   "selectors": {
36 |     "homepage": {
37 |       "lvl0": {
38 |         "selector": ".contents h1",
39 |         "default_value": "sentometrics Home page"
40 |       },
41 |       "lvl1": {
42 |         "selector": ".contents h2"
43 |       },
44 |       "lvl2": {
45 |         "selector": ".contents h3",
46 |         "default_value": "Context"
47 |       },
48 |       "lvl3": ".ref-arguments td, .ref-description",
49 |       "text": ".contents p, .contents li, .contents .pre"
50 |     },
51 |     "reference": {
52 |       "lvl0": {
53 |         "selector": ".contents h1"
54 |       },
55 |       "lvl1": {
56 |         "selector": ".contents .name",
57 |         "default_value": "Argument"
58 |       },
59 |       "lvl2": {
60 |         "selector": ".ref-arguments th",
61 |         "default_value": "Description"
62 |       },
63 |       "lvl3": ".ref-arguments td, .ref-description",
64 |       "text": ".contents p, .contents li"
65 |     },
66 |     "articles": {
67 |       "lvl0": {
68 |         "selector": ".contents h1"
69 |       },
70 |       "lvl1": {
71 |         "selector": ".contents .name"
72 |       },
73 |       "lvl2": {
74 |         "selector": ".contents h2, .contents h3",
75 |         "default_value": "Context"
76 |       },
77 |       "text": ".contents p, .contents li"
78 |     }
79 |   },
80 |   "selectors_exclude": [
81 |     ".dont-index"
82 |     ],
83 |   "min_indexed_level": 2,
84 |   "custom_settings": {
85 |     "separatorsToIndex": "_",
86 |     "attributesToRetrieve": [
87 |       "hierarchy",
88 |       "content",
89 |       "anchor",
90 |       "url",
91 |       "url_without_anchor"
92 |     ]
93 |   }
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/docs/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon-16x16.png


--------------------------------------------------------------------------------
/docs/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/logo.png


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: '3.2'
 2 | pkgdown: 2.1.1
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   examples/corpus: examples/corpus.html
 6 |   development: development.html
 7 |   applications/epu: applications/epu.html
 8 |   contributions/gopress: contributions/gopress.html
 9 |   examples/indexation: examples/indexation.html
10 |   contributions/isa: contributions/isa.html
11 |   examples/modeling: examples/modeling.html
12 |   examples/sentiment: examples/sentiment.html
13 |   sentometrics: sentometrics.html
14 |   applications/vix: applications/vix.html
15 | last_built: 2025-04-02T11:29Z
16 | urls:
17 |   reference: https://sentometricsresearch.github.io/sentometrics/reference
18 |   article: https://sentometricsresearch.github.io/sentometrics/articles
19 | 


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/docs/reference/figures/gsoc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/gsoc.png


--------------------------------------------------------------------------------
/docs/reference/figures/innoviris.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/innoviris.png


--------------------------------------------------------------------------------
/docs/reference/figures/ivado.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/ivado.png


--------------------------------------------------------------------------------
/docs/reference/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/logo.png


--------------------------------------------------------------------------------
/docs/reference/figures/snsf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/snsf.png


--------------------------------------------------------------------------------
/docs/reference/figures/swissuniversities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/swissuniversities.png


--------------------------------------------------------------------------------
/docs/reference/plot.sento_measures-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/plot.sento_measures-1.png


--------------------------------------------------------------------------------
/examples/vix.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/examples/vix.rda


--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- badges: start -->
 3 | [![CRAN](https://www.r-pkg.org/badges/version/sentometrics)](https://cran.r-project.org/package=sentometrics)
 4 | [![codecov](https://codecov.io/github/SentometricsResearch/sentometrics/branch/master/graphs/badge.svg)](https://codecov.io/github/SentometricsResearch/sentometrics)
 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/last-day/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
 6 | [![Downloads](https://cranlogs.r-pkg.org/badges/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
 7 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics)
 8 | <!-- badges: end -->
 9 | 
10 | # sentometrics <a href='https://www.sentometrics.org'><img src='man/figures/logo.png' style="padding-left:2px" align="right" width="160"/></a>
11 | 
12 | > The **`sentometrics`** package offers an **integrated framework for textual sentiment time series aggregation and prediction**. It accounts for the intrinsic challenge that textual sentiment can be computed in many different ways, as well as the large number of possibilities to pool sentiment into a time series index. The package integrates the fast _quantification_ of sentiment from texts, the _aggregation_ into different sentiment time series, and the _prediction_ based on these measures. All in one coherent workflow! 
13 | 
14 | Explore this package website to learn about what you can do with **`sentometrics`** and how so.
15 | 
16 | ### Reference
17 | 
18 | Please cite **`sentometrics`** in publications. See the **Citation** section on the right.
19 | 
20 | ### Acknowledgements
21 | 
22 | This software package originates from a
23 | [Google Summer of Code 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Sentometrics:-An-integrated-framework-for-text-based-multivariate-time-series-modeling-and-forecasting) project, was further developed 
24 | during a follow-up [Google Summer of Code 2019](https://github.com/rstats-gsoc/gsoc2019/wiki/sentometrics) project, and benefited generally from financial support by [Innoviris](https://innoviris.brussels), [IVADO](https://www.ivado.ca), [swissuniversities](https://www.swissuniversities.ch), and the [Swiss National Science Foundation](http://www.snf.ch) (grants #179281 and #191730).
25 | 
26 | <!-- <img height="71" src="man/figures/gsoc.png" alt="GSoC" style="margin-right:8px"/>
27 | <img height="79" src="man/figures/innoviris.png" alt="Innoviris" style="display:inline-block;margin-right:8px"/>
28 | <img height="77" src="man/figures/ivado.png" alt="IVADO" style="display:inline-block;margin-right:8px"/>
29 | <img height="79" src="man/figures/swissuniversities.png" alt="swissuniversities" style="display:inline-block;margin-right:8px"/>
30 | <img height="88" src="man/figures/snsf.png" alt="SNSF" style="display:inline-block;margin-right:8px"/> -->
31 | 
32 | ### Contact
33 | 
34 | Reach out to [Samuel Borms](mailto:borms_sam@hotmail.com) if you have questions, suggestions or want to become a contributor. See the **News > Development** section to find out what you can help us with.
35 | 
36 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | 
 2 | bibentry(bibtype = "Article",
 3 |   title        = "The {R} Package {sentometrics} to Compute, Aggregate, and Predict with Textual Sentiment",
 4 |   author       = c(person(given = "David",
 5 |                           family = "Ardia",
 6 |                           email = "david.ardia@hec.ca"),
 7 |                    person(given = "Keven",
 8 |                           family = "Bluteau",
 9 |                           email = "keven.bluteau@usherbrooke.ca"),
10 |                    person(given = "Samuel",
11 |                           family = "Borms",
12 |                           email = "borms_sam@hotmail.com"),
13 |                    person(given = "Kris",
14 |                           family = "Boudt",
15 |                           email = "kris.boudt@ugent.be")),
16 |   journal      = "Journal of Statistical Software",
17 |   year         = "2021",
18 |   volume       = "99",
19 |   number       = "2",
20 |   pages        = "1--40",
21 |   doi          = "10.18637/jss.v099.i02",
22 |   header       = "To cite sentometrics in publications use:"
23 | )
24 | 
25 | 


--------------------------------------------------------------------------------
/inst/extdata/test_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/inst/extdata/test_data.rda


--------------------------------------------------------------------------------
/man/add_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentocorpus.R
 3 | \name{add_features}
 4 | \alias{add_features}
 5 | \title{Add feature columns to a (sento_)corpus object}
 6 | \usage{
 7 | add_features(
 8 |   corpus,
 9 |   featuresdf = NULL,
10 |   keywords = NULL,
11 |   do.binary = TRUE,
12 |   do.regex = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}, or a \pkg{quanteda}
17 | \code{\link[quanteda]{corpus}} object.}
18 | 
19 | \item{featuresdf}{a named \code{data.frame} of type \code{numeric} where each columns is a new feature to be added to the
20 | inputted \code{corpus} object. If the number of rows in \code{featuresdf} is not equal to the number of documents
21 | in \code{corpus}, recycling will occur. The numeric values should be between 0 and 1 (included).}
22 | 
23 | \item{keywords}{a named \code{list}. For every element, a new feature column is added with a value of 1 for the texts
24 | in which (at least one of) the keyword(s) appear(s), and 0 if not (for \code{do.binary = TRUE}), or with as value the
25 | normalized number of times the keyword(s) occur(s) in the text (for \code{do.binary = FALSE}). If no texts match a
26 | keyword, no column is added. The \code{list} names are used as the names of the new features. For more complex searching,
27 | instead of just keywords, one can also directly use a single regex expression to define a new feature (see the details section).}
28 | 
29 | \item{do.binary}{a \code{logical}, if \code{do.binary = FALSE}, the number of occurrences are normalized
30 | between 0 and 1 (see argument \code{keywords}).}
31 | 
32 | \item{do.regex}{a \code{logical} vector equal in length to the number of elements in the \code{keywords} argument
33 | \code{list}, or a single value if it applies to all. It should be set to \code{TRUE} at those positions where a single
34 | regex expression is used to identify the particular feature.}
35 | }
36 | \value{
37 | An updated \code{corpus} object.
38 | }
39 | \description{
40 | Adds new feature columns, either user-supplied or based on keyword(s)/regex pattern search, to
41 | a provided \code{sento_corpus} or a \pkg{quanteda} \code{\link[quanteda]{corpus}} object.
42 | }
43 | \details{
44 | If a provided feature name is already part of the corpus, it will be replaced. The \code{featuresdf} and
45 | \code{keywords} arguments can be provided at the same time, or only one of them, leaving the other at \code{NULL}. We use
46 | the \pkg{stringi} package for searching the keywords. The \code{do.regex} argument points to the corresponding elements
47 | in \code{keywords}. For \code{FALSE}, we transform the keywords into a simple regex expression, involving \code{"\\b"} for
48 | exact word boundary matching and (if multiple keywords) \code{|} as OR operator. The elements associated to \code{TRUE} do
49 | not undergo this transformation, and are evaluated as given, if the corresponding keywords vector consists of only one
50 | expression. For a large corpus and/or complex regex patterns, this function may require some patience. Scaling between 0
51 | and 1 is performed via min-max normalization, per column.
52 | }
53 | \examples{
54 | set.seed(505)
55 | 
56 | # construct a corpus and add (a) feature(s) to it
57 | corpus <- quanteda::corpus_sample(
58 |   sento_corpus(corpusdf = sentometrics::usnews), 500
59 | )
60 | corpus1 <- add_features(corpus,
61 |                         featuresdf = data.frame(random = runif(quanteda::ndoc(corpus))))
62 | corpus2 <- add_features(corpus,
63 |                         keywords = list(pres = "president", war = "war"),
64 |                         do.binary = FALSE)
65 | corpus3 <- add_features(corpus,
66 |                         keywords = list(pres = c("Obama", "US president")))
67 | corpus4 <- add_features(corpus,
68 |                         featuresdf = data.frame(all = 1),
69 |                         keywords = list(pres1 = "Obama|US [p|P]resident",
70 |                                         pres2 = "\\\\bObama\\\\b|\\\\bUS president\\\\b",
71 |                                         war = "war"),
72 |                         do.regex = c(TRUE, TRUE, FALSE))
73 | 
74 | sum(quanteda::docvars(corpus3, "pres")) ==
75 |   sum(quanteda::docvars(corpus4, "pres2")) # TRUE
76 | 
77 | # adding a complementary feature
78 | nonpres <- data.frame(nonpres = as.numeric(!quanteda::docvars(corpus3, "pres")))
79 | corpus3 <- add_features(corpus3, featuresdf = nonpres)
80 | 
81 | }
82 | \author{
83 | Samuel Borms
84 | }
85 | 


--------------------------------------------------------------------------------
/man/aggregate.sentiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_main.R
 3 | \name{aggregate.sentiment}
 4 | \alias{aggregate.sentiment}
 5 | \title{Aggregate textual sentiment across sentences, documents and time}
 6 | \usage{
 7 | \method{aggregate}{sentiment}(x, ctr, do.full = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sentiment} object created using \code{\link{compute_sentiment}} (from a \code{sento_corpus}
11 | object) or using \code{\link{as.sentiment}}.}
12 | 
13 | \item{ctr}{output from a \code{\link{ctr_agg}} call. The \code{howWithin} and \code{nCore} elements are ignored.}
14 | 
15 | \item{do.full}{if \code{do.full = TRUE} (by default), does entire aggregation up to a \code{sento_measures}
16 | object, else only goes from sentence-level to document-level. Ignored if no \code{"sentence_id"} column in
17 | \code{sentiment} input object.}
18 | 
19 | \item{...}{not used.}
20 | }
21 | \value{
22 | A document-level \code{sentiment} object or a fully aggregated \code{sento_measures} object.
23 | }
24 | \description{
25 | Aggregates textual sentiment scores at sentence- or document-level into a panel of textual
26 | sentiment measures. Can also be used to aggregate sentence-level sentiment scores into
27 | document-level sentiment scores. This function is called within the \code{\link{sento_measures}} function.
28 | }
29 | \examples{
30 | set.seed(505)
31 | 
32 | data("usnews", package = "sentometrics")
33 | data("list_lexicons", package = "sentometrics")
34 | data("list_valence_shifters", package = "sentometrics")
35 | 
36 | # computation of sentiment
37 | corpus <- sento_corpus(corpusdf = usnews)
38 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
39 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")],
40 |                      list_valence_shifters[["en"]])
41 | l2 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")],
42 |                      list_valence_shifters[["en"]][, c("x", "t")])
43 | sent1 <- compute_sentiment(corpusSample, l1, how = "counts")
44 | sent2 <- compute_sentiment(corpusSample, l2, do.sentence = TRUE)
45 | sent3 <- compute_sentiment(as.character(corpusSample), l2,
46 |                            do.sentence = TRUE)
47 | ctr <- ctr_agg(howTime = c("linear"), by = "year", lag = 3)
48 | 
49 | # aggregate into sentiment measures
50 | sm1 <- aggregate(sent1, ctr)
51 | sm2 <- aggregate(sent2, ctr)
52 | 
53 | # two-step aggregation (first into document-level sentiment)
54 | sd2 <- aggregate(sent2, ctr, do.full = FALSE)
55 | sm3 <- aggregate(sd2, ctr)
56 | 
57 | # aggregation of a sentiment data.table
58 | cols <- c("word_count", names(l2)[-length(l2)])
59 | sd3 <- sent3[, lapply(.SD, sum), by = "id", .SDcols = cols]
60 | 
61 | }
62 | \seealso{
63 | \code{\link{compute_sentiment}}, \code{\link{ctr_agg}}, \code{\link{sento_measures}}
64 | }
65 | \author{
66 | Samuel Borms, Keven Bluteau
67 | }
68 | 


--------------------------------------------------------------------------------
/man/as.data.table.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{as.data.table.sento_measures}
 4 | \alias{as.data.table.sento_measures}
 5 | \title{Get the sentiment measures}
 6 | \usage{
 7 | \method{as.data.table}{sento_measures}(x, keep.rownames = FALSE, format = "wide", ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{keep.rownames}{see \code{\link[data.table]{as.data.table}}.}
13 | 
14 | \item{format}{a single \code{character} vector, one of \code{c("wide", "long")}.}
15 | 
16 | \item{...}{not used.}
17 | }
18 | \value{
19 | The panel of sentiment measures under \code{sento_measures[["measures"]]},
20 | in wide or long format.
21 | }
22 | \description{
23 | Extracts the sentiment measures \code{data.table} in either wide (by default)
24 | or long format.
25 | }
26 | \examples{
27 | data("usnews", package = "sentometrics")
28 | data("list_lexicons", package = "sentometrics")
29 | data("list_valence_shifters", package = "sentometrics")
30 | 
31 | sm <- sento_measures(sento_corpus(corpusdf = usnews[1:200, ]),
32 |                      sento_lexicons(list_lexicons["LM_en"]),
33 |                      ctr_agg(lag = 3))
34 | 
35 | data.table::as.data.table(sm)
36 | data.table::as.data.table(sm, format = "long")
37 | 
38 | }
39 | \author{
40 | Samuel Borms
41 | }
42 | 


--------------------------------------------------------------------------------
/man/as.sentiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentiment_engines.R
 3 | \name{as.sentiment}
 4 | \alias{as.sentiment}
 5 | \title{Convert a sentiment table to a sentiment object}
 6 | \usage{
 7 | as.sentiment(s)
 8 | }
 9 | \arguments{
10 | \item{s}{a \code{data.table} or \code{data.frame} that can be converted into a \code{sentiment} object. It
11 | should have at least an \code{"id"}, a \code{"date"}, a \code{"word_count"} and one sentiment scores column.
12 | If other column names are provided with a separating \code{"--"}, the first part is considered the lexicon
13 | (or more generally, the sentiment computation method), and the second part the feature. For sentiment column
14 | names without any \code{"--"}, a \code{"dummyFeature"} component is added.}
15 | }
16 | \value{
17 | A \code{sentiment} object.
18 | }
19 | \description{
20 | Converts a properly structured sentiment table into a \code{sentiment} object, that can be used
21 | for further aggregation with the \code{\link{aggregate.sentiment}} function. This allows to start from
22 | sentiment scores not necessarily computed with \code{\link{compute_sentiment}}.
23 | }
24 | \examples{
25 | set.seed(505)
26 | 
27 | data("usnews", package = "sentometrics")
28 | data("list_lexicons", package = "sentometrics")
29 | 
30 | ids <- paste0("id", 1:200)
31 | dates <- sample(seq(as.Date("2015-01-01"), as.Date("2018-01-01"), by = "day"), 200, TRUE)
32 | word_count <- sample(150:850, 200, replace = TRUE)
33 | sent <- matrix(rnorm(200 * 8), nrow =  200)
34 | s1 <- s2 <- data.table::data.table(id = ids, date = dates, word_count = word_count, sent)
35 | s3 <- data.frame(id = ids, date = dates, word_count = word_count, sent,
36 |                  stringsAsFactors = FALSE)
37 | s4 <- compute_sentiment(usnews$texts[201:400],
38 |                         sento_lexicons(list_lexicons["GI_en"]),
39 |                         "counts", do.sentence = TRUE)
40 | m <- "method"
41 | 
42 | colnames(s1)[-c(1:3)] <- paste0(m, 1:8)
43 | sent1 <- as.sentiment(s1)
44 | 
45 | colnames(s2)[-c(1:3)] <- c(paste0(m, 1:4, "--", "feat1"), paste0(m, 1:4, "--", "feat2"))
46 | sent2 <- as.sentiment(s2)
47 | 
48 | colnames(s3)[-c(1:3)] <- c(paste0(m, 1:3, "--", "feat1"), paste0(m, 1:3, "--", "feat2"),
49 |                            paste0(m, 4:5))
50 | sent3 <- as.sentiment(s3)
51 | 
52 | s4[, "date" := rep(dates, s4[, max(sentence_id), by = id][[2]])]
53 | sent4 <- as.sentiment(s4)
54 | 
55 | # further aggregation from then on is easy...
56 | sentMeas1 <- aggregate(sent1, ctr_agg(lag = 10))
57 | sent5 <- aggregate(sent4, ctr_agg(howDocs = "proportional"), do.full = FALSE)
58 | 
59 | }
60 | \author{
61 | Samuel Borms
62 | }
63 | 


--------------------------------------------------------------------------------
/man/as.sento_corpus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentocorpus.R
 3 | \name{as.sento_corpus}
 4 | \alias{as.sento_corpus}
 5 | \title{Convert a quanteda or tm corpus object into a sento_corpus object}
 6 | \usage{
 7 | as.sento_corpus(x, dates = NULL, do.clean = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{a \pkg{quanteda} \code{\link[quanteda]{corpus}} object, a \pkg{tm}
11 | \code{\link[tm]{SimpleCorpus}} or a \pkg{tm} \code{\link[tm]{VCorpus}} object. For \pkg{tm}
12 | corpora, every corpus element should consist of a single \code{"content"} \code{character} vector
13 | as the document unit.}
14 | 
15 | \item{dates}{an optional sequence of dates as \code{"yyyy-mm-dd"}, of the same length as the number
16 | of documents in the input corpus, to define the \code{"date"} column. If \code{dates = NULL}, the
17 | \code{"date"} metadata element in the input corpus, if available, will be used but should be in the
18 | same \code{"yyyy-mm-dd"} format.}
19 | 
20 | \item{do.clean}{see \code{\link{sento_corpus}}.}
21 | }
22 | \value{
23 | A \code{sento_corpus} object, as returned by the \code{\link{sento_corpus}} function.
24 | }
25 | \description{
26 | Converts most common \pkg{quanteda} and \pkg{tm} corpus objects into a
27 | \code{sento_corpus} object. Appropriate available metadata is integrated as features;
28 | for a \pkg{quanteda} corpus, this can come from \code{docvars(x)}, for a \pkg{tm} corpus,
29 | only \code{meta(x, type = "indexed")} metadata is considered.
30 | }
31 | \examples{
32 | data("usnews", package = "sentometrics")
33 | txt <- system.file("texts", "txt", package = "tm")
34 | reuters <- system.file("texts", "crude", package = "tm")
35 | 
36 | # reshuffle usnews data.frame for use in quanteda and tm
37 | dates <- usnews$date
38 | usnews$wrong <- "notNumeric"
39 | colnames(usnews)[c(1, 3)] <- c("doc_id", "text")
40 | 
41 | # conversion from a quanteda corpus
42 | qcorp <- quanteda::corpus(usnews,
43 |                           text_field = "text", docid_field = "doc_id")
44 | corp1 <- as.sento_corpus(qcorp)
45 | corp2 <- as.sento_corpus(qcorp, sample(dates)) # overwrites "date" column
46 | 
47 | # conversion from a tm SimpleCorpus corpus (DataframeSource)
48 | tmSCdf <- tm::SimpleCorpus(tm::DataframeSource(usnews))
49 | corp3 <- as.sento_corpus(tmSCdf)
50 | 
51 | # conversion from a tm SimpleCorpus corpus (DirSource)
52 | tmSCdir <- tm::SimpleCorpus(tm::DirSource(txt))
53 | corp4 <- as.sento_corpus(tmSCdir, dates[1:length(tmSCdir)])
54 | 
55 | # conversion from a tm VCorpus corpus (DataframeSource)
56 | tmVCdf <- tm::VCorpus(tm::DataframeSource(usnews))
57 | corp5 <- as.sento_corpus(tmVCdf)
58 | 
59 | # conversion from a tm VCorpus corpus (DirSource)
60 | tmVCdir <- tm::VCorpus(tm::DirSource(reuters),
61 |                        list(reader = tm::readReut21578XMLasPlain))
62 | corp6 <- as.sento_corpus(tmVCdir, dates[1:length(tmVCdir)])
63 | 
64 | }
65 | \seealso{
66 | \code{\link[quanteda]{corpus}}, \code{\link[tm]{SimpleCorpus}}, \code{\link[tm]{VCorpus}},
67 | \code{\link{sento_corpus}}
68 | }
69 | \author{
70 | Samuel Borms
71 | }
72 | 


--------------------------------------------------------------------------------
/man/attributions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/attribution.R
 3 | \name{attributions}
 4 | \alias{attributions}
 5 | \title{Retrieve top-down model sentiment attributions}
 6 | \usage{
 7 | attributions(
 8 |   model,
 9 |   sento_measures,
10 |   do.lags = TRUE,
11 |   do.normalize = FALSE,
12 |   refDates = NULL,
13 |   factor = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{model}{a \code{sento_model} or a \code{sento_modelIter} object created with \code{\link{sento_model}}.}
18 | 
19 | \item{sento_measures}{the \code{sento_measures} object, as created with \code{\link{sento_measures}}, used to estimate
20 | the model from the first argument (make sure this is the case!).}
21 | 
22 | \item{do.lags}{a \code{logical}, \code{TRUE} also computes the attribution to each time lag. For large time lags,
23 | this is time-consuming.}
24 | 
25 | \item{do.normalize}{a \code{logical}, \code{TRUE} divides each element of every attribution vector at a given date by its
26 | L2-norm at that date, normalizing the values between -1 and 1. The document attributions are not normalized.}
27 | 
28 | \item{refDates}{the dates (as \code{"yyyy-mm-dd"}) at which attribution is to be performed. These should be between the latest
29 | date available in the input \code{sento_measures} object and the first estimation sample date (that is, \code{model$dates[1]}
30 | if \code{model} is a \code{sento_model} object). All dates should also be in \code{get_dates(sento_measures)}. If
31 | \code{NULL} (default), attribution is calculated for all in-sample dates. Ignored if \code{model} is a \code{sento_modelIter}
32 | object, for which attribution is calculated for all out-of-sample prediction dates.}
33 | 
34 | \item{factor}{the factor level as a single \code{character} vector to calculate attribution
35 | for in case of (a) multinomial model(s). Ignored for linear and binomial models.}
36 | }
37 | \value{
38 | A \code{list} of class \code{attributions}, with \code{"documents"}, \code{"lags"}, \code{"lexicons"},
39 | \code{"features"} and \code{"time"} as attribution dimensions. The last four dimensions are
40 | \code{data.table}s having a \code{"date"} column and the other columns the different components of the dimension, with
41 | the attributions as values. Document-level attribution is further decomposed into a \code{data.table} per date, with
42 | \code{"id"}, \code{"date"} and \code{"attrib"} columns. If \code{do.lags = FALSE}, the \code{"lags"} element is set
43 | to \code{NULL}.
44 | }
45 | \description{
46 | Computes the attributions to predictions for a (given) number of dates at all possible sentiment dimensions,
47 | based on the coefficients associated to each sentiment measure, as estimated in the provided model object.
48 | }
49 | \details{
50 | See \code{\link{sento_model}} for an elaborate modeling example including the calculation and plotting of
51 | attributions. The attribution for logistic models is represented in terms of log odds. For binomial models, it is
52 | calculated with respect to the last factor level or factor column. A \code{NULL} value for document-level attribution
53 | on a given date means no documents are directly implicated in the associated prediction.
54 | }
55 | \seealso{
56 | \code{\link{sento_model}}
57 | }
58 | \author{
59 | Samuel Borms, Keven Bluteau
60 | }
61 | 


--------------------------------------------------------------------------------
/man/corpus_summarize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentocorpus.R
 3 | \name{corpus_summarize}
 4 | \alias{corpus_summarize}
 5 | \title{Summarize the sento_corpus object}
 6 | \usage{
 7 | corpus_summarize(x, by = "day", features = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{is a \code{sento_corpus} object created with \code{\link{sento_corpus}}}
11 | 
12 | \item{by}{a single \code{character} vector to specify the frequency time interval over which the statistics
13 | need to be calculated.}
14 | 
15 | \item{features}{a \code{character} vector that can be used to select a subset of the features to analyse.}
16 | }
17 | \value{
18 | returns a \code{list} containing:
19 | \item{stats}{a \code{data.table} with statistics about the number of documents, total, average, minimum and maximum
20 | number of tokens and the number of texts per features for each date.}
21 | \item{plots}{a \code{list} with three plots representing the above statistics.}
22 | }
23 | \description{
24 | Summarizes the \code{sento_corpus} object and returns insights about the evolution of
25 | documents, features and tokens over time.
26 | }
27 | \details{
28 | This function summarizes the \code{sento_corpus} object by generating statistics about
29 | documents, features and tokens over time. The insights can be narrowed down to a chosen set of metadata
30 | features. The same tokenization as in the sentiment calculation in \code{\link{compute_sentiment}} is used.
31 | }
32 | \examples{
33 | data("usnews", package = "sentometrics")
34 | 
35 | corpus <- sento_corpus(usnews)
36 | 
37 | # summary of corpus by day
38 | summary1 <- corpus_summarize(corpus)
39 | 
40 | # summary of corpus by month for both journals
41 | summary2 <- corpus_summarize(corpus, by = "month",
42 |                              features = c("wsj", "wapo"))
43 | 
44 | }
45 | \author{
46 | Jeroen Van Pelt, Samuel Borms, Andres Algaba
47 | }
48 | 


--------------------------------------------------------------------------------
/man/data-defunct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \docType{data}
 4 | \name{data-defunct}
 5 | \alias{data-defunct}
 6 | \alias{lexicons}
 7 | \alias{valence}
 8 | \title{Datasets with defunct names}
 9 | \description{
10 | These are datasets that have been renamed and removed.
11 | }
12 | \details{
13 | The dataset \code{lexicons} is defunct, use \code{list_lexicons} instead.
14 | 
15 | The dataset \code{valence} is defunct, use \code{list_valence_shifters} instead.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/diff.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{diff.sento_measures}
 4 | \alias{diff.sento_measures}
 5 | \title{Differencing of sentiment measures}
 6 | \usage{
 7 | \method{diff}{sento_measures}(x, lag = 1, differences = 1, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{lag}{a \code{numeric}, see documentation for the generic \code{\link{diff}}.}
13 | 
14 | \item{differences}{a \code{numeric}, see documentation for the generic \code{\link{diff}}.}
15 | 
16 | \item{...}{not used.}
17 | }
18 | \value{
19 | A modified \code{sento_measures} object, with the measures replaced by the differenced measures as well as updated
20 | statistics.
21 | }
22 | \description{
23 | Differences the sentiment measures from a \code{sento_measures} object.
24 | }
25 | \examples{
26 | data("usnews", package = "sentometrics")
27 | data("list_lexicons", package = "sentometrics")
28 | data("list_valence_shifters", package = "sentometrics")
29 | 
30 | # construct a sento_measures object to start with
31 | corpus <- sento_corpus(corpusdf = usnews)
32 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
33 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]])
34 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3)
35 | sento_measures <- sento_measures(corpusSample, l, ctr)
36 | 
37 | # first-order difference sentiment measures with a lag of two
38 | diffed <- diff(sento_measures, lag = 2, differences = 1)
39 | 
40 | }
41 | \author{
42 | Samuel Borms
43 | }
44 | 


--------------------------------------------------------------------------------
/man/epu.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentometrics.R
 3 | \docType{data}
 4 | \name{epu}
 5 | \alias{epu}
 6 | \title{Monthly U.S. Economic Policy Uncertainty index}
 7 | \format{
 8 | A \code{data.frame} with 403 rows and 4 columns.
 9 | }
10 | \source{
11 | \href{http://www.policyuncertainty.com/us_monthly.html}{Measuring Economic Policy Uncertainty}. Retrieved
12 | August 24, 2018.
13 | }
14 | \usage{
15 | data("epu")
16 | }
17 | \description{
18 | Monthly news-based U.S. Economic Policy Uncertainty (EPU) index (Baker, Bloom and Davis, 2016). Goes from January 1985
19 | to July 2018, and includes a binomial and a multinomial example series. Following columns are present:
20 | 
21 | \itemize{
22 |   \item date. Date as \code{"yyyy-mm-01"}.
23 |   \item index. A \code{numeric} monthly index value.
24 |   \item above. A \code{factor} with value \code{"above"} if the index is greater than the mean of the entire series, else
25 |   \code{"below"}.
26 |   \item aboveMulti. A \code{factor} with values \code{"above+"}, \code{"above"}, \code{"below"} and \code{"below-"} if the
27 |   index is greater than the 75\% quantile and the 50\% quantile, or smaller than the 50\% quantile and the 25\% quantile,
28 |   respectively and in a mutually exclusive sense.
29 | }
30 | }
31 | \examples{
32 | data("epu", package = "sentometrics")
33 | head(epu)
34 | 
35 | }
36 | \references{
37 | Baker, Bloom and Davis (2016). \strong{Measuring Economic Policy Uncertainty}.
38 | \emph{The Quarterly Journal of Economics 131, 1593-1636}, \doi{10.1093/qje/qjw024}.
39 | }
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/figures/gsoc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/gsoc.png


--------------------------------------------------------------------------------
/man/figures/innoviris.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/innoviris.png


--------------------------------------------------------------------------------
/man/figures/ivado.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/ivado.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/logo.png


--------------------------------------------------------------------------------
/man/figures/snsf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/snsf.png


--------------------------------------------------------------------------------
/man/figures/swissuniversities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/swissuniversities.png


--------------------------------------------------------------------------------
/man/get_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{get_dates}
 4 | \alias{get_dates}
 5 | \title{Get the dates of the sentiment measures/time series}
 6 | \usage{
 7 | get_dates(sento_measures)
 8 | }
 9 | \arguments{
10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | }
12 | \value{
13 | The \code{"date"} column in \code{sento_measures[["measures"]]} as a \code{character} vector.
14 | }
15 | \description{
16 | Returns the dates of the sentiment time series.
17 | }
18 | \author{
19 | Samuel Borms
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_dimensions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{get_dimensions}
 4 | \alias{get_dimensions}
 5 | \title{Get the dimensions of the sentiment measures}
 6 | \usage{
 7 | get_dimensions(sento_measures)
 8 | }
 9 | \arguments{
10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | }
12 | \value{
13 | The \code{"features"}, \code{"lexicons"} and \code{"time"} elements in \code{sento_measures}.
14 | }
15 | \description{
16 | Returns the components across all three dimensions of the sentiment measures.
17 | }
18 | \author{
19 | Samuel Borms
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_hows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_hows}
 4 | \alias{get_hows}
 5 | \title{Options supported to perform aggregation into sentiment measures}
 6 | \usage{
 7 | get_hows()
 8 | }
 9 | \value{
10 | A list with the supported aggregation hows for arguments \code{howWithin} (\code{"words"}), \code{howDows}
11 | (\code{"docs"}) and \code{howTime} (\code{"time"}), to be supplied to \code{\link{ctr_agg}}.
12 | }
13 | \description{
14 | Outputs the supported aggregation arguments. Call for information purposes only. Used within
15 | \code{\link{ctr_agg}} to check if supplied aggregation hows are supported.
16 | }
17 | \details{
18 | See the package's \href{https://www.ssrn.com/abstract=3067734}{vignette} for a detailed explanation of all
19 | aggregation options.
20 | }
21 | \seealso{
22 | \code{\link{ctr_agg}}
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_loss_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomodel.R
 3 | \name{get_loss_data}
 4 | \alias{get_loss_data}
 5 | \title{Retrieve loss data from a selection of models}
 6 | \usage{
 7 | get_loss_data(models, loss = c("DA", "error", "errorSq", "AD", "accuracy"))
 8 | }
 9 | \arguments{
10 | \item{models}{a named \code{list} of \code{sento_modelIter} objects. All models should be of the same family, being
11 | either \code{"gaussian"}, \code{"binomial"} or \code{"multinomial"}, and have performance data of the same dimensions.}
12 | 
13 | \item{loss}{a single \code{character} vector, either \code{"DA"} (directional \emph{in}accuracy), \code{"error"}
14 | (predicted minus realized response variable), \code{"errorSq"} (squared errors), \code{"AD"} (absolute errors) or
15 | \code{"accuracy"} (\emph{in}accurate class predictions). This argument defines on what basis the model confidence set
16 | is calculated. The first four options are available for \code{"gaussian"} models, the last option applies only to
17 | \code{"binomial"} and \code{"multinomial"} models.}
18 | }
19 | \value{
20 | A \code{matrix} of loss data.
21 | }
22 | \description{
23 | Structures specific performance data for a set of different \code{sento_modelIter} objects as loss data.
24 | Can then be used, for instance, as an input to create a model confidence set (Hansen, Lunde and Nason, 2011) with
25 | the \pkg{MCS} package.
26 | }
27 | \examples{
28 | \dontrun{
29 | data("usnews", package = "sentometrics")
30 | data("list_lexicons", package = "sentometrics")
31 | data("list_valence_shifters", package = "sentometrics")
32 | data("epu", package = "sentometrics")
33 | 
34 | set.seed(505)
35 | 
36 | # construct two sento_measures objects
37 | corpusAll <- sento_corpus(corpusdf = usnews)
38 | corpus <- quanteda::corpus_subset(corpusAll, date >= "1997-01-01" & date < "2014-10-01")
39 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]])
40 | 
41 | ctrA <- ctr_agg(howWithin = "proportionalPol", howDocs = "proportional",
42 |                 howTime = c("equal_weight", "linear"), by = "month", lag = 3)
43 | sentMeas <- sento_measures(corpus, l, ctrA)
44 | 
45 | # prepare y and other x variables
46 | y <- epu[epu$date \%in\% get_dates(sentMeas), "index"]
47 | length(y) == nobs(sentMeas) # TRUE
48 | x <- data.frame(runif(length(y)), rnorm(length(y))) # two other (random) x variables
49 | colnames(x) <- c("x1", "x2")
50 | 
51 | # estimate different type of regressions
52 | ctrM <- ctr_model(model = "gaussian", type = "AIC", do.iter = TRUE,
53 |                  h = 0, nSample = 120, start = 50)
54 | out1 <- sento_model(sentMeas, y, x = x, ctr = ctrM)
55 | out2 <- sento_model(sentMeas, y, x = NULL, ctr = ctrM)
56 | out3 <- sento_model(subset(sentMeas, select = "linear"), y, x = x, ctr = ctrM)
57 | out4 <- sento_model(subset(sentMeas, select = "linear"), y, x = NULL, ctr = ctrM)
58 | 
59 | lossData <- get_loss_data(models = list(m1 = out1, m2 = out2, m3 = out3, m4 = out4),
60 |                           loss = "errorSq")
61 | 
62 | mcs <- MCS::MCSprocedure(lossData)}
63 | 
64 | }
65 | \references{
66 | Hansen, Lunde and Nason (2011). \strong{The model confidence set}. \emph{Econometrica 79, 453-497},
67 | \doi{10.3982/ECTA5771}.
68 | }
69 | \seealso{
70 | \code{\link{sento_model}}, \code{\link[MCS]{MCSprocedure}}
71 | }
72 | \author{
73 | Samuel Borms
74 | }
75 | 


--------------------------------------------------------------------------------
/man/list_lexicons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentometrics.R
 3 | \docType{data}
 4 | \name{list_lexicons}
 5 | \alias{list_lexicons}
 6 | \title{Built-in lexicons}
 7 | \format{
 8 | A \code{list} with all built-in lexicons, appropriately named as \code{"NAME_language(_tr)"}.
 9 | }
10 | \source{
11 | \href{https://link.springer.com/article/10.1007/s10579-016-9364-5}{FEEL lexicon}. Retrieved November 1, 2017.
12 | 
13 | \href{https://inquirer.sites.fas.harvard.edu}{GI lexicon}. Retrieved November 1, 2017.
14 | 
15 | \href{https://journals.sagepub.com/doi/abs/10.1177/0021943608319388}{HENRY lexicon}. Retrieved
16 | November 1, 2017.
17 | 
18 | \href{https://sraf.nd.edu/textual-analysis/}{LM lexicon}. Retrieved
19 | November 1, 2017.
20 | }
21 | \usage{
22 | data("list_lexicons")
23 | }
24 | \description{
25 | A \code{list} containing all built-in lexicons as a \code{data.table} with two columns: a \code{x} column with the words,
26 | and a \code{y} column with the polarities. The \code{list} element names incorporate consecutively the name and language
27 | (based on the two-letter ISO code convention as in \code{\link[stopwords]{stopwords}}), and \code{"_tr"} as
28 | suffix if the lexicon is translated. The translation was done via Microsoft Translator through Microsoft
29 | Word. Only the entries that conform to the original language entry after retranslation, and those that have actually been
30 | translated, are kept. The last condition is assumed to be fulfilled when the translation differs from the original entry.
31 | All words are unigrams and in lowercase. The built-in lexicons are the following:
32 | 
33 | \itemize{
34 |   \item FEEL_en_tr
35 |   \item FEEL_fr (Abdaoui, \enc{Azé}{Aze}, Bringay and Poncelet, 2017)
36 |   \item FEEL_nl_tr
37 |   \item GI_en (General Inquirer, i.e. Harvard IV-4 combined with Laswell)
38 |   \item GI_fr_tr
39 |   \item GI_nl_tr
40 |   \item HENRY_en (Henry, 2008)
41 |   \item HENRY_fr_tr
42 |   \item HENRY_nl_tr
43 |   \item LM_en (Loughran and McDonald, 2011)
44 |   \item LM_fr_tr
45 |   \item LM_nl_tr
46 | }
47 | 
48 | Other useful lexicons can be found in the \pkg{lexicon} package, more specifically the datasets preceded by
49 | \code{hash_sentiment_}.
50 | }
51 | \examples{
52 | data("list_lexicons", package = "sentometrics")
53 | list_lexicons[c("FEEL_en_tr", "LM_en")]
54 | 
55 | }
56 | \references{
57 | Abdaoui, \enc{Azé}{Aze}, Bringay and Poncelet (2017). \strong{FEEL: French Expanded Emotion Lexicon}.
58 | \emph{Language Resources & Evaluation 51, 833-855}, \doi{10.1007/s10579-016-9364-5}.
59 | 
60 | Henry (2008). \strong{Are investors influenced by how earnings press releases are written?}.
61 | \emph{Journal of Business Communication 45, 363-407}, \doi{10.1177/0021943608319388}.
62 | 
63 | Loughran and McDonald (2011). \strong{When is a liability not a liability? Textual analysis, dictionaries, and 10-Ks}.
64 | \emph{Journal of Finance 66, 35-65}, \doi{10.1111/j.1540-6261.2010.01625.x}.
65 | }
66 | \keyword{datasets}
67 | 


--------------------------------------------------------------------------------
/man/list_valence_shifters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentometrics.R
 3 | \docType{data}
 4 | \name{list_valence_shifters}
 5 | \alias{list_valence_shifters}
 6 | \title{Built-in valence word lists}
 7 | \format{
 8 | A \code{list} with all built-in valence word lists, appropriately named.
 9 | }
10 | \source{
11 | \code{\link[lexicon]{hash_valence_shifters}} (English valence shifters). Retrieved August 24, 2018.
12 | }
13 | \usage{
14 | data("list_valence_shifters")
15 | }
16 | \description{
17 | A \code{list} containing all built-in valence word lists, as \code{data.table}s with three columns: a \code{x} column with
18 | the words, a \code{y} column with the values associated to each word, and a \code{t} column with the type of valence
19 | shifter (\code{1} = negators, \code{2} = amplifiers, \code{3} = deamplifiers,
20 | \code{4} = adversative conjunctions). The \code{list} element names indicate the language
21 | (based on the two-letter ISO code convention as in \code{\link[stopwords]{stopwords}}) of the valence word list.
22 | All non-English word lists are translated via Microsoft Translator through Microsoft Word. Only the entries whose
23 | translation differs from the original entry are kept. All words are unigrams and in lowercase. The built-in valence word
24 | lists are available in following languages:
25 | 
26 | \itemize{
27 |   \item English (\code{"en"})
28 |   \item French (\code{"fr"})
29 |   \item Dutch (\code{"nl"})
30 | }
31 | }
32 | \examples{
33 | data("list_valence_shifters", package = "sentometrics")
34 | list_valence_shifters["en"]
35 | 
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/man/measures_fill.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_measures_xyz.R
 3 | \name{measures_fill}
 4 | \alias{measures_fill}
 5 | \title{Add and fill missing dates to sentiment measures}
 6 | \usage{
 7 | measures_fill(
 8 |   sento_measures,
 9 |   fill = "zero",
10 |   dateBefore = NULL,
11 |   dateAfter = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
16 | 
17 | \item{fill}{an element of \code{c("zero", "latest")}; the first assumes missing dates represent zero sentiment,
18 | the second assumes missing dates represent constant sentiment.}
19 | 
20 | \item{dateBefore}{a date as \code{"yyyy-mm-dd"}, to stretch the sentiment time series from up to the first date. Should
21 | be earlier than \code{get_dates(sento_measures)[1]} to take effect. The values for these dates are set to those at
22 | \code{get_dates(sento_measures)[1]}. If \code{NULL}, then ignored.}
23 | 
24 | \item{dateAfter}{a date as \code{"yyyy-mm-dd"}, to stretch the sentiment time series up to this date. Should be
25 | later than \code{tail(get_dates(sento_measures), 1)} to take effect. If \code{NULL}, then ignored.}
26 | }
27 | \value{
28 | A modified \code{sento_measures} object.
29 | }
30 | \description{
31 | Adds missing dates between earliest and latest date of a \code{sento_measures} object or two more extreme
32 | boundary dates, such that the time series are continuous date-wise. Fills in any missing date with either 0 or the
33 | most recent non-missing value.
34 | }
35 | \details{
36 | The \code{dateBefore} and \code{dateAfter} dates are converted according to the \code{sento_measures[["by"]]}
37 | frequency.
38 | }
39 | \examples{
40 | # construct a sento_measures object to start with
41 | corpus <- sento_corpus(corpusdf = sentometrics::usnews)
42 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
43 | l <- sento_lexicons(sentometrics::list_lexicons[c("LM_en", "HENRY_en")],
44 |                     sentometrics::list_valence_shifters[["en"]])
45 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "day", lag = 7, fill = "none")
46 | sento_measures <- sento_measures(corpusSample, l, ctr)
47 | 
48 | # fill measures
49 | f1 <- measures_fill(sento_measures)
50 | f2 <- measures_fill(sento_measures, fill = "latest")
51 | f3 <- measures_fill(sento_measures, fill = "zero",
52 |                     dateBefore = get_dates(sento_measures)[1] - 10,
53 |                     dateAfter = tail(get_dates(sento_measures), 1) + 15)
54 | 
55 | }
56 | \author{
57 | Samuel Borms
58 | }
59 | 


--------------------------------------------------------------------------------
/man/measures_update.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_measures_xyz.R
 3 | \name{measures_update}
 4 | \alias{measures_update}
 5 | \title{Update sentiment measures}
 6 | \usage{
 7 | measures_update(sento_measures, sento_corpus, lexicons)
 8 | }
 9 | \arguments{
10 | \item{sento_measures}{\code{sento_measures} object created with \code{\link{sento_measures}}}
11 | 
12 | \item{sento_corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}.}
13 | 
14 | \item{lexicons}{a \code{sento_lexicons} object created with \code{\link{sento_lexicons}}.}
15 | }
16 | \value{
17 | An updated \code{sento_measures} object.
18 | }
19 | \description{
20 | Updates a \code{sento_measures} object based on a new \code{sento_corpus} provided.
21 | Sentiment for the unseen corpus texts calculated and aggregated applying the control variables
22 | from the input \code{sento_measures} object.
23 | }
24 | \examples{
25 | data("usnews", package = "sentometrics")
26 | 
27 | corpus1 <- sento_corpus(usnews[1:500, ])
28 | corpus2 <- sento_corpus(usnews[400:2000, ])
29 | 
30 | ctr <- ctr_agg(howTime = "linear", by = "year", lag = 3)
31 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")],
32 |                     list_valence_shifters[["en"]])
33 | sento_measures <- sento_measures(corpus1, l, ctr)
34 | sento_measuresNew <- measures_update(sento_measures, corpus2, l)
35 | 
36 | }
37 | \seealso{
38 | \code{\link{sento_measures}}, \code{\link{compute_sentiment}}
39 | }
40 | \author{
41 | Jeroen Van Pelt, Samuel Borms, Andres Algaba
42 | }
43 | 


--------------------------------------------------------------------------------
/man/merge.sentiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentiment_engines.R
 3 | \name{merge.sentiment}
 4 | \alias{merge.sentiment}
 5 | \title{Merge sentiment objects horizontally and/or vertically}
 6 | \usage{
 7 | \method{merge}{sentiment}(...)
 8 | }
 9 | \arguments{
10 | \item{...}{\code{sentiment} objects to merge.}
11 | }
12 | \value{
13 | The new, combined, \code{sentiment} object, ordered by \code{"date"} and \code{"id"}.
14 | }
15 | \description{
16 | Combines multiple \code{sentiment} objects with possibly different column names
17 | into a new \code{sentiment} object. Here, too, any resulting \code{NA} values are converted to zero.
18 | }
19 | \examples{
20 | data("usnews", package = "sentometrics")
21 | data("list_lexicons", package = "sentometrics")
22 | data("list_valence_shifters", package = "sentometrics")
23 | 
24 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
25 | l2 <- sento_lexicons(list_lexicons[c("FEEL_en_tr")])
26 | l3 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en", "FEEL_en_tr")])
27 | 
28 | corp1 <- sento_corpus(corpusdf = usnews[1:200, ])
29 | corp2 <- sento_corpus(corpusdf = usnews[201:450, ])
30 | corp3 <- sento_corpus(corpusdf = usnews[401:700, ])
31 | 
32 | s1 <- compute_sentiment(corp1, l1, "proportionalPol")
33 | s2 <- compute_sentiment(corp2, l1, "counts")
34 | s3 <- compute_sentiment(corp3, l1, "counts")
35 | s4 <- compute_sentiment(corp2, l1, "counts", do.sentence = TRUE)
36 | s5 <- compute_sentiment(corp3, l2, "proportional", do.sentence = TRUE)
37 | s6 <- compute_sentiment(corp3, l1, "counts", do.sentence = TRUE)
38 | s7 <- compute_sentiment(corp3, l3, "UShaped", do.sentence = TRUE)
39 | 
40 | # straightforward row-wise merge
41 | m1 <- merge(s1, s2, s3)
42 | nrow(m1) == 700 # TRUE
43 | 
44 | # another straightforward row-wise merge
45 | m2 <- merge(s4, s6)
46 | 
47 | # merge of sentence and non-sentence calculations
48 | m3 <- merge(s3, s6)
49 | 
50 | # different methods adds columns
51 | m4 <- merge(s4, s5)
52 | nrow(m4) == nrow(m2) # TRUE
53 | 
54 | # different methods and weighting adds rows and columns
55 | ## rows are added only when the different weighting
56 | ## approach for a specific method gives other sentiment values
57 | m5 <- merge(s4, s7)
58 | nrow(m5) > nrow(m4) # TRUE
59 | 
60 | }
61 | \author{
62 | Samuel Borms
63 | }
64 | 


--------------------------------------------------------------------------------
/man/nmeasures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{nmeasures}
 4 | \alias{nmeasures}
 5 | \title{Get number of sentiment measures}
 6 | \usage{
 7 | nmeasures(sento_measures)
 8 | }
 9 | \arguments{
10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | }
12 | \value{
13 | The number of sentiment measures in the input \code{sento_measures} object.
14 | }
15 | \description{
16 | Returns the number of sentiment measures.
17 | }
18 | \author{
19 | Samuel Borms
20 | }
21 | 


--------------------------------------------------------------------------------
/man/nobs.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{nobs.sento_measures}
 4 | \alias{nobs.sento_measures}
 5 | \title{Get number of observations in the sentiment measures}
 6 | \usage{
 7 | \method{nobs}{sento_measures}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{...}{not used.}
13 | }
14 | \value{
15 | The number of rows (observations/data points) in \code{object[["measures"]]}.
16 | }
17 | \description{
18 | Returns the number of data points available in the sentiment measures.
19 | }
20 | \author{
21 | Samuel Borms
22 | }
23 | 


--------------------------------------------------------------------------------
/man/peakdates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_main.R
 3 | \name{peakdates}
 4 | \alias{peakdates}
 5 | \title{Extract dates related to sentiment time series peaks}
 6 | \usage{
 7 | peakdates(sento_measures, n = 10, type = "both", do.average = FALSE)
 8 | }
 9 | \arguments{
10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{n}{a positive \code{numeric} value to indicate the number of dates associated to sentiment peaks to extract.
13 | If \code{n < 1}, it is interpreted as a quantile (for example, 0.07 would mean the 7\% most extreme dates).}
14 | 
15 | \item{type}{a \code{character} value, either \code{"pos"}, \code{"neg"} or \code{"both"}, respectively to look
16 | for the \code{n} dates related to the most positive, most negative or most extreme (in absolute terms) sentiment
17 | occurrences.}
18 | 
19 | \item{do.average}{a \code{logical} to indicate whether peaks should be selected based on the average sentiment
20 | value per date.}
21 | }
22 | \value{
23 | A vector of type \code{"Date"} corresponding to the \code{n} extracted sentiment peak dates.
24 | }
25 | \description{
26 | This function extracts the dates for which aggregated time series sentiment is most
27 | extreme (lowest, highest or both in absolute terms). The extracted dates are unique, even when,
28 | for example, all most extreme sentiment values (for different sentiment measures) occur on only
29 | one date.
30 | }
31 | \examples{
32 | set.seed(505)
33 | 
34 | data("usnews", package = "sentometrics")
35 | data("list_lexicons", package = "sentometrics")
36 | data("list_valence_shifters", package = "sentometrics")
37 | 
38 | # construct a sento_measures object to start with
39 | corpus <- sento_corpus(corpusdf = usnews)
40 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
41 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]])
42 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "month", lag = 3)
43 | sento_measures <- sento_measures(corpusSample, l, ctr)
44 | 
45 | # extract the peaks
46 | peaksAbs <- peakdates(sento_measures, n = 5)
47 | peaksAbsQuantile <- peakdates(sento_measures, n = 0.50)
48 | peaksPos <- peakdates(sento_measures, n = 5, type = "pos")
49 | peaksNeg <- peakdates(sento_measures, n = 5, type = "neg")
50 | 
51 | }
52 | \author{
53 | Samuel Borms
54 | }
55 | 


--------------------------------------------------------------------------------
/man/peakdocs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentiment_engines.R
 3 | \name{peakdocs}
 4 | \alias{peakdocs}
 5 | \title{Extract documents related to sentiment peaks}
 6 | \usage{
 7 | peakdocs(sentiment, n = 10, type = "both", do.average = FALSE)
 8 | }
 9 | \arguments{
10 | \item{sentiment}{a \code{sentiment} object created using \code{\link{compute_sentiment}} or
11 | \code{\link{as.sentiment}}.}
12 | 
13 | \item{n}{a positive \code{numeric} value to indicate the number of documents associated to sentiment
14 | peaks to extract. If \code{n < 1}, it is interpreted as a quantile (for example, 0.07 would mean the
15 | 7\% most extreme documents).}
16 | 
17 | \item{type}{a \code{character} value, either \code{"pos"}, \code{"neg"} or \code{"both"}, respectively to look
18 | for the \code{n} documents related to the most positive, most negative or most extreme (in absolute terms) sentiment
19 | occurrences.}
20 | 
21 | \item{do.average}{a \code{logical} to indicate whether peaks should be selected based on the average sentiment
22 | value per document.}
23 | }
24 | \value{
25 | A vector of type \code{"character"} corresponding to the \code{n} extracted document identifiers.
26 | }
27 | \description{
28 | This function extracts the documents with most extreme sentiment (lowest, highest or both
29 | in absolute terms). The extracted documents are unique, even when, for example, all most extreme
30 | sentiment values (across sentiment calculation methods) occur only for one document.
31 | }
32 | \examples{
33 | set.seed(505)
34 | 
35 | data("usnews", package = "sentometrics")
36 | data("list_lexicons", package = "sentometrics")
37 | data("list_valence_shifters", package = "sentometrics")
38 | 
39 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
40 | 
41 | corpus <- sento_corpus(corpusdf = usnews)
42 | corpusSample <- quanteda::corpus_sample(corpus, size = 200)
43 | sent <- compute_sentiment(corpusSample, l, how = "proportionalPol")
44 | 
45 | # extract the peaks
46 | peaksAbs <- peakdocs(sent, n = 5)
47 | peaksAbsQuantile <- peakdocs(sent, n = 0.50)
48 | peaksPos <- peakdocs(sent, n = 5, type = "pos")
49 | peaksNeg <- peakdocs(sent, n = 5, type = "neg")
50 | 
51 | }
52 | \author{
53 | Samuel Borms
54 | }
55 | 


--------------------------------------------------------------------------------
/man/plot.attributions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/attribution.R
 3 | \name{plot.attributions}
 4 | \alias{plot.attributions}
 5 | \title{Plot prediction attributions at specified level}
 6 | \usage{
 7 | \method{plot}{attributions}(x, group = "features", ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an \code{attributions} object created with \code{\link{attributions}}.}
11 | 
12 | \item{group}{a value from \code{c("lags", "lexicons", "features", "time")}.}
13 | 
14 | \item{...}{not used.}
15 | }
16 | \value{
17 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using
18 | the \code{+} operator. By default, a legend is positioned at the top if the number of components of the
19 | dimension is at maximum twelve.
20 | }
21 | \description{
22 | Shows a plot of the attributions along the dimension provided, stacked per date.
23 | }
24 | \details{
25 | See \code{\link{sento_model}} for an elaborate modeling example including the calculation and plotting of
26 | attributions. This function does not handle the plotting of the attribution of individual documents, since there are
27 | often a lot of documents involved and they appear only once at one date (even though a document may contribute to
28 | predictions at several dates, depending on the number of lags in the time aggregation).
29 | }
30 | \author{
31 | Samuel Borms, Keven Bluteau
32 | }
33 | 


--------------------------------------------------------------------------------
/man/plot.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{plot.sento_measures}
 4 | \alias{plot.sento_measures}
 5 | \title{Plot sentiment measures}
 6 | \usage{
 7 | \method{plot}{sento_measures}(x, group = "all", ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{group}{a value from \code{c("lexicons", "features", "time", "all")}. The first three choices display the average of
13 | all measures from the same group, in a different color. The choice \code{"all"} displays every single sentiment measure
14 | in a separate color, but this may look visually overwhelming very fast, and can be quite slow.}
15 | 
16 | \item{...}{not used.}
17 | }
18 | \value{
19 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using
20 | the \code{+} operator (see example). By default, a legend is positioned at the top if there are at maximum twelve line
21 | graphs plotted and \code{group} is different from \code{"all"}.
22 | }
23 | \description{
24 | Plotting method that shows all sentiment measures from the provided \code{sento_measures}
25 | object in one plot, or the average along one of the lexicons, features and time weighting dimensions.
26 | }
27 | \examples{
28 | # construct a sento_measures object to start with
29 | corpus <- sento_corpus(corpusdf = sentometrics::usnews)
30 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
31 | l <- sento_lexicons(sentometrics::list_lexicons[c("LM_en")],
32 |                     sentometrics::list_valence_shifters[["en"]])
33 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "month", lag = 3)
34 | sm <- sento_measures(corpusSample, l, ctr)
35 | 
36 | # plot sentiment measures
37 | plot(sm, "features")
38 | 
39 | \dontrun{
40 | # adjust appearance of plot
41 | library("ggplot2")
42 | p <- plot(sm)
43 | p <- p +
44 |   scale_x_date(name = "year", date_labels = "\%Y") +
45 |   scale_y_continuous(name = "newName")
46 | p}
47 | 
48 | }
49 | \author{
50 | Samuel Borms
51 | }
52 | 


--------------------------------------------------------------------------------
/man/plot.sento_modelIter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomodel.R
 3 | \name{plot.sento_modelIter}
 4 | \alias{plot.sento_modelIter}
 5 | \title{Plot iterative predictions versus realized values}
 6 | \usage{
 7 | \method{plot}{sento_modelIter}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_modelIter} object created using \code{\link{sento_model}}.}
11 | 
12 | \item{...}{not used.}
13 | }
14 | \value{
15 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using
16 | the \code{+} operator.
17 | }
18 | \description{
19 | Displays a plot of all predictions made through the iterative model computation as incorporated in the
20 | input \code{sento_modelIter} object, as well as the corresponding true values.
21 | }
22 | \details{
23 | See \code{\link{sento_model}} for an elaborate modeling example including the plotting of out-of-sample
24 | performance.
25 | }
26 | \author{
27 | Samuel Borms
28 | }
29 | 


--------------------------------------------------------------------------------
/man/predict.sento_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomodel.R
 3 | \name{predict.sento_model}
 4 | \alias{predict.sento_model}
 5 | \title{Make predictions from a sento_model object}
 6 | \usage{
 7 | \method{predict}{sento_model}(object, newx, type = "response", offset = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a \code{sento_model} object created with \code{\link{sento_model}}.}
11 | 
12 | \item{newx}{a data \code{matrix} used for the prediction(s), row-by-row; see
13 | \code{\link[glmnet]{predict.glmnet}}. The number of columns should be equal to \code{sum(sento_model$nVar)}, being the
14 | number of original sentiment measures and other variables. The variables discarded in the regression process are
15 | dealt with within this function, based on \code{sento_model$discarded}.}
16 | 
17 | \item{type}{type of prediction required, a value from \code{c("link", "response", "class")}, see documentation for
18 | \code{\link[glmnet]{predict.glmnet}}.}
19 | 
20 | \item{offset}{not used.}
21 | 
22 | \item{...}{not used.}
23 | }
24 | \value{
25 | A prediction output depending on the \code{type} argument.
26 | }
27 | \description{
28 | Prediction method for \code{sento_model} class, with usage along the lines of
29 | \code{\link[glmnet]{predict.glmnet}}, but simplified in terms of parameters.
30 | }
31 | \seealso{
32 | \code{\link[glmnet]{predict.glmnet}}, \code{\link{sento_model}}
33 | }
34 | \author{
35 | Samuel Borms
36 | }
37 | 


--------------------------------------------------------------------------------
/man/scale.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{scale.sento_measures}
 4 | \alias{scale.sento_measures}
 5 | \title{Scaling and centering of sentiment measures}
 6 | \usage{
 7 | \method{scale}{sento_measures}(x, center = TRUE, scale = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{center}{a \code{logical} or a \code{numeric} vector, see documentation for the generic \code{\link{scale}}.
13 | Alternatively, one can provide a \code{matrix} of dimensions \code{nobs(sento_measures)} times \code{1} or
14 | \code{nmeasures(sento_measures)} with values to subtract from each individual observation.}
15 | 
16 | \item{scale}{a \code{logical} or a \code{numeric} vector, see documentation for the generic \code{\link{scale}}.
17 | Alternatively, one can provide a \code{matrix} of dimensions \code{nobs(sento_measures)} times \code{1} or
18 | \code{nmeasures(sento_measures)} with values to divide each individual observation by.}
19 | }
20 | \value{
21 | A modified \code{sento_measures} object, with the measures replaced by the scaled measures as well as updated
22 | statistics.
23 | }
24 | \description{
25 | Scales and centers the sentiment measures from a \code{sento_measures} object, column-per-column. By default,
26 | the measures are normalized. \code{NA}s are removed first.
27 | }
28 | \details{
29 | If one of the arguments \code{center} or \code{scale} is a \code{matrix}, this operation will be applied first,
30 | and eventual other centering or scaling is computed on that data.
31 | }
32 | \examples{
33 | data("usnews", package = "sentometrics")
34 | data("list_lexicons", package = "sentometrics")
35 | data("list_valence_shifters", package = "sentometrics")
36 | 
37 | set.seed(505)
38 | 
39 | # construct a sento_measures object to start with
40 | corpus <- sento_corpus(corpusdf = usnews)
41 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
42 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
43 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3)
44 | sento_measures <- sento_measures(corpusSample, l, ctr)
45 | 
46 | # scale sentiment measures to zero mean and unit standard deviation
47 | sc1 <- scale(sento_measures)
48 | 
49 | n <- nobs(sento_measures)
50 | m <- nmeasures(sento_measures)
51 | 
52 | # subtract a matrix
53 | sc2 <- scale(sento_measures, center = matrix(runif(n * m), n, m), scale = FALSE)
54 | 
55 | # divide every row observation based on a one-column matrix, then center
56 | sc3 <- scale(sento_measures, center = TRUE, scale = matrix(runif(n)))
57 | 
58 | }
59 | \author{
60 | Samuel Borms
61 | }
62 | 


--------------------------------------------------------------------------------
/man/sento_corpus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentocorpus.R
 3 | \name{sento_corpus}
 4 | \alias{sento_corpus}
 5 | \title{Create a sento_corpus object}
 6 | \usage{
 7 | sento_corpus(corpusdf, do.clean = FALSE)
 8 | }
 9 | \arguments{
10 | \item{corpusdf}{a \code{data.frame} (or a \code{data.table}, or a \code{tbl}) with as named columns: a document \code{"id"}
11 | column (coercible to \code{character} mode), a \code{"date"} column (as \code{"yyyy-mm-dd"}), a \code{"texts"} column
12 | (in \code{character} mode), an optional \code{"language"} column (in \code{character} mode), and a series of
13 | feature columns of type \code{numeric}, with values between 0 and 1 to specify the degree of connectedness of
14 | a feature to a document. Features could be for instance topics (e.g., legal or economic) or article sources (e.g., online or
15 | print). When no feature column is provided, a feature named \code{"dummyFeature"}
16 | is added. All spaces in the names of the features are replaced by \code{'_'}. Feature columns with values not
17 | between 0 and 1 are rescaled column-wise.}
18 | 
19 | \item{do.clean}{a \code{logical}, if \code{TRUE} all texts undergo a cleaning routine to eliminate common textual garbage.
20 | This includes a brute force replacement of HTML tags and non-alphanumeric characters by an empty string. To use with care
21 | if the text is meant to have non-alphanumeric characters! Preferably, cleaning is done outside of this function call.}
22 | }
23 | \value{
24 | A \code{sento_corpus} object, derived from a \pkg{quanteda} \code{\link[quanteda]{corpus}}
25 | object. The corpus is ordered by date.
26 | }
27 | \description{
28 | Formalizes a collection of texts into a \code{sento_corpus} object derived from the \pkg{quanteda}
29 | \code{\link[quanteda]{corpus}} object. The \pkg{quanteda} package provides a robust text mining infrastructure
30 | (see their \href{http://quanteda.io/index.html}{website}), including a handy corpus manipulation toolset. This function
31 | performs a set of checks on the input data and prepares the corpus for further analysis by structurally
32 | integrating a date dimension and numeric metadata features.
33 | }
34 | \details{
35 | A \code{sento_corpus} object is a specialized instance of a \pkg{quanteda} \code{\link[quanteda]{corpus}}. Any
36 | \pkg{quanteda} function applicable to its \code{\link[quanteda]{corpus}} object can also be applied to a \code{sento_corpus}
37 | object. However, changing a given \code{sento_corpus} object too drastically using some of \pkg{quanteda}'s functions might
38 | alter the very structure the corpus is meant to have (as defined in the \code{corpusdf} argument) to be able to be used as
39 | an input in other functions of the \pkg{sentometrics} package. There are functions, including
40 | \code{\link[quanteda]{corpus_sample}} or \code{\link[quanteda]{corpus_subset}}, that do not change the actual corpus
41 | structure and may come in handy.
42 | 
43 | To add additional features, use \code{\link{add_features}}. Binary features are useful as
44 | a mechanism to select the texts which have to be integrated in the respective feature-based sentiment measure(s), but
45 | applies only when \code{do.ignoreZeros = TRUE}. Because of this (implicit) selection that can be performed, having
46 | complementary features (e.g., \code{"economy"} and \code{"noneconomy"}) makes sense.
47 | 
48 | It is also possible to add one non-numerical feature, that is, \code{"language"}, to designate the language
49 | of the corpus texts. When this feature is provided, a \code{list} of lexicons for different
50 | languages is expected in the \code{compute_sentiment} function.
51 | }
52 | \examples{
53 | data("usnews", package = "sentometrics")
54 | 
55 | # corpus construction
56 | corp <- sento_corpus(corpusdf = usnews)
57 | 
58 | # take a random subset making use of quanteda
59 | corpusSmall <- quanteda::corpus_sample(corp, size = 500)
60 | 
61 | # deleting a feature
62 | quanteda::docvars(corp, field = "wapo") <- NULL
63 | 
64 | # deleting all features results in the addition of a dummy feature
65 | quanteda::docvars(corp, field = c("economy", "noneconomy", "wsj")) <- NULL
66 | 
67 | \dontrun{
68 | # to add or replace features, use the add_features() function...
69 | quanteda::docvars(corp, field = c("wsj", "new")) <- 1}
70 | 
71 | # corpus creation when no features are present
72 | corpusDummy <- sento_corpus(corpusdf = usnews[, 1:3])
73 | 
74 | # corpus creation with a qualitative language feature
75 | usnews[["language"]] <- "en"
76 | usnews[["language"]][c(200:400)] <- "nl"
77 | corpusLang <- sento_corpus(corpusdf = usnews)
78 | 
79 | }
80 | \seealso{
81 | \code{\link[quanteda]{corpus}}, \code{\link{add_features}}
82 | }
83 | \author{
84 | Samuel Borms
85 | }
86 | 


--------------------------------------------------------------------------------
/man/sento_lexicons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentolexicons.R
 3 | \name{sento_lexicons}
 4 | \alias{sento_lexicons}
 5 | \title{Set up lexicons (and valence word list) for use in sentiment analysis}
 6 | \usage{
 7 | sento_lexicons(lexiconsIn, valenceIn = NULL, do.split = FALSE)
 8 | }
 9 | \arguments{
10 | \item{lexiconsIn}{a named \code{list} of (raw) lexicons, each element as a \code{data.table} or a \code{data.frame} with
11 | respectively a \code{character} column (the words) and a \code{numeric} column (the polarity scores). This argument can be
12 | one of the built-in lexicons accessible via \code{sentometrics::list_lexicons}.}
13 | 
14 | \item{valenceIn}{a single valence word list as a \code{data.table} or a \code{data.frame} with respectively a \code{"x"}
15 | and a \code{"y"} or \code{"t"} column. The first column has the words, \code{"y"} has the values for bigram
16 | shifting, and \code{"t"} has the types of the valence shifter for a clustered approach to sentiment calculation
17 | (supported types: \code{1} = negators, \code{2} = amplifiers, \code{3} = deamplifiers, \code{4} = adversative conjunctions).
18 | Type \code{4} is only used in a clusters-based sentence-level sentiment calculation.
19 | If three columns are provided, only the first two will be considered. This argument can be one of the
20 | built-in valence word lists accessible via \code{sentometrics::list_valence_shifters}. A word that appears in both a
21 | lexicon and the valence word list is prioritized as a lexical entry during sentiment calculation. If
22 | \code{NULL}, valence shifting is not applied in the sentiment analysis.}
23 | 
24 | \item{do.split}{a \code{logical} that if \code{TRUE} splits every lexicon into a separate positive polarity and negative
25 | polarity lexicon.}
26 | }
27 | \value{
28 | A \code{list} of class \code{sento_lexicons} with each lexicon as a separate element according to its name, as a
29 | \code{data.table}, and optionally an element named \code{valence} that comprises the valence words. Every \code{"x"} column
30 | contains the words, every \code{"y"} column contains the scores. The \code{"t"} column for valence shifters
31 | contains the different types.
32 | }
33 | \description{
34 | Structures provided lexicon(s) and optionally valence words. One can for example combine (part of) the
35 | built-in lexicons from \code{data("list_lexicons")} with other lexicons, and add one of the built-in valence word lists
36 | from \code{data("list_valence_shifters")}. This function makes the output coherent, by converting all words to
37 | lowercase and checking for duplicates. All entries consisting of more than one word are discarded, as required for
38 | bag-of-words sentiment analysis.
39 | }
40 | \examples{
41 | data("list_lexicons", package = "sentometrics")
42 | data("list_valence_shifters", package = "sentometrics")
43 | 
44 | # lexicons straight from built-in word lists
45 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
46 | 
47 | # including a self-made lexicon, with and without valence shifters
48 | lexIn <- c(list(myLexicon = data.table::data.table(w = c("nice", "boring"), s = c(2, -1))),
49 |            list_lexicons[c("GI_en")])
50 | valIn <- list_valence_shifters[["en"]]
51 | l2 <- sento_lexicons(lexIn)
52 | l3 <- sento_lexicons(lexIn, valIn)
53 | l4 <- sento_lexicons(lexIn, valIn[, c("x", "y")], do.split = TRUE)
54 | l5 <- sento_lexicons(lexIn, valIn[, c("x", "t")], do.split = TRUE)
55 | l6 <- l5[c("GI_en_POS", "valence")] # preserves sento_lexicons class
56 | 
57 | \dontrun{
58 | # include lexicons from lexicon package
59 | lexIn2 <- list(hul = lexicon::hash_sentiment_huliu, joc = lexicon::hash_sentiment_jockers)
60 | l7 <- sento_lexicons(c(lexIn, lexIn2), valIn)}
61 | 
62 | \dontrun{
63 | # faulty extraction, no replacement allowed
64 | l5["valence"]
65 | l2[0]
66 | l3[22]
67 | l4[1] <- l2[1]
68 | l4[[1]] <- l2[[1]]
69 | l4$GI_en_NEG <- l2$myLexicon}
70 | 
71 | }
72 | \author{
73 | Samuel Borms
74 | }
75 | 


--------------------------------------------------------------------------------
/man/sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_main.R
 3 | \name{sento_measures}
 4 | \alias{sento_measures}
 5 | \title{One-way road towards a sento_measures object}
 6 | \usage{
 7 | sento_measures(sento_corpus, lexicons, ctr)
 8 | }
 9 | \arguments{
10 | \item{sento_corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}.}
11 | 
12 | \item{lexicons}{a \code{sentolexicons} object created with \code{\link{sento_lexicons}}.}
13 | 
14 | \item{ctr}{output from a \code{\link{ctr_agg}} call.}
15 | }
16 | \value{
17 | A \code{sento_measures} object, which is a \code{list} containing:
18 | \item{measures}{a \code{data.table} with a \code{"date"} column and all textual sentiment measures as remaining columns.}
19 | \item{features}{a \code{character} vector of the different features.}
20 | \item{lexicons}{a \code{character} vector of the different lexicons used.}
21 | \item{time}{a \code{character} vector of the different time weighting schemes used.}
22 | \item{stats}{a \code{data.frame} with some elementary statistics (mean, standard deviation, maximum, minimum, and
23 | average correlation with the other measures) for each individual sentiment measure. In all computations, NAs are
24 | removed first.}
25 | \item{sentiment}{the document-level sentiment scores \code{data.table} with \code{"date"},
26 | \code{"word_count"} and lexicon-feature sentiment scores columns. The \code{"date"} column has the
27 | dates converted at the frequency for across-document aggregation. All zeros are replaced by \code{NA}
28 | if \code{ctr$docs$weightingParam$do.ignoreZeros = TRUE}.}
29 | \item{attribWeights}{a \code{list} of document and time weights used in the \code{\link{attributions}} function.
30 | Serves further no direct purpose.}
31 | \item{ctr}{a \code{list} encapsulating the control parameters.}
32 | }
33 | \description{
34 | Wrapper function which assembles calls to \code{\link{compute_sentiment}} and \code{\link{aggregate}}.
35 | Serves as the most direct way towards a panel of textual sentiment measures as a \code{sento_measures} object.
36 | }
37 | \details{
38 | As a general rule, neither the names of the features, lexicons or time weighting schemes may contain
39 | any `-' symbol.
40 | }
41 | \examples{
42 | data("usnews", package = "sentometrics")
43 | data("list_lexicons", package = "sentometrics")
44 | data("list_valence_shifters", package = "sentometrics")
45 | 
46 | # construct a sento_measures object to start with
47 | corpus <- sento_corpus(corpusdf = usnews)
48 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
49 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]])
50 | ctr <- ctr_agg(howWithin = "counts",
51 |                howDocs = "proportional",
52 |                howTime = c("equal_weight", "linear", "almon"),
53 |                by = "month",
54 |                lag = 3,
55 |                ordersAlm = 1:3,
56 |                do.inverseAlm = TRUE)
57 | sento_measures <- sento_measures(corpusSample, l, ctr)
58 | summary(sento_measures)
59 | 
60 | }
61 | \seealso{
62 | \code{\link{compute_sentiment}}, \code{\link{aggregate}}, \code{\link{measures_update}}
63 | }
64 | \author{
65 | Samuel Borms, Keven Bluteau
66 | }
67 | 


--------------------------------------------------------------------------------
/man/sentometrics-defunct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \name{sentometrics-defunct}
 4 | \alias{sentometrics-defunct}
 5 | \alias{ctr_merge}
 6 | \alias{perform_MCS}
 7 | \alias{fill_measures}
 8 | \alias{merge_measures}
 9 | \alias{to_global}
10 | \alias{subset_measures}
11 | \alias{select_measures}
12 | \alias{setup_lexicons}
13 | \alias{retrieve_attributions}
14 | \alias{perform_agg}
15 | \alias{plot_attributions}
16 | \alias{almons}
17 | \alias{exponentials}
18 | \alias{to_sentocorpus}
19 | \alias{to_sentiment}
20 | \alias{get_measures}
21 | \alias{measures_subset}
22 | \alias{measures_select}
23 | \alias{measures_delete}
24 | \alias{sentiment_bind}
25 | \alias{measures_merge}
26 | \alias{measures_global}
27 | \alias{sento_app}
28 | \title{Defunct functions}
29 | \usage{
30 | ctr_merge(...)
31 | 
32 | perform_MCS(...)
33 | 
34 | fill_measures(...)
35 | 
36 | merge_measures(...)
37 | 
38 | to_global(...)
39 | 
40 | subset_measures(...)
41 | 
42 | select_measures(...)
43 | 
44 | setup_lexicons(...)
45 | 
46 | retrieve_attributions(...)
47 | 
48 | perform_agg(...)
49 | 
50 | plot_attributions(...)
51 | 
52 | almons(...)
53 | 
54 | exponentials(...)
55 | 
56 | to_sentocorpus(...)
57 | 
58 | to_sentiment(...)
59 | 
60 | get_measures(...)
61 | 
62 | measures_subset(...)
63 | 
64 | measures_select(...)
65 | 
66 | measures_delete(...)
67 | 
68 | sentiment_bind(...)
69 | 
70 | measures_merge(...)
71 | 
72 | measures_global(...)
73 | 
74 | sento_app(...)
75 | }
76 | \arguments{
77 | \item{...}{allowed input arguments.}
78 | }
79 | \description{
80 | Functions defunct due to changed naming or because functionality is discarded. See the NEWS file for more information
81 | about since when or why functions have been defunct.
82 | }
83 | \keyword{internal}
84 | 


--------------------------------------------------------------------------------
/man/sentometrics-deprecated.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \name{sentometrics-deprecated}
 4 | \alias{sentometrics-deprecated}
 5 | \title{Deprecated functions}
 6 | \description{
 7 | Functions deprecated due to changed naming or because functionality is discarded. The general (but not
 8 | blindly followed) rule is that deprecated functions are made defunct every 1 major or every 2 minor
 9 | package updates. See the NEWS file for more information about since when or why functions have been
10 | deprecated.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/sentometrics-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentometrics.R
 3 | \docType{package}
 4 | \name{sentometrics-package}
 5 | \alias{sentometrics}
 6 | \alias{sentometrics-package}
 7 | \title{sentometrics: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction}
 8 | \description{
 9 | The \pkg{sentometrics} package is an integrated framework for textual sentiment time series
10 | aggregation and prediction. It accounts for the intrinsic challenge that, for a given text, sentiment can
11 | be computed in many different ways, as well as the large number of possibilities to pool sentiment across
12 | texts and time. This additional layer of manipulation does not exist in standard text mining and time series
13 | analysis packages. The package therefore integrates the fast \emph{quantification} of sentiment from texts,
14 | the \emph{aggregation} into different sentiment time series and the optimized \emph{prediction} based on
15 | these measures.
16 | }
17 | \note{
18 | Please cite the package in publications. Use \code{citation("sentometrics")}.
19 | }
20 | \section{Main functions}{
21 | 
22 | \itemize{
23 | \item Corpus (features) generation: \code{\link{sento_corpus}}, \code{\link{add_features}},
24 | \code{\link{as.sento_corpus}}
25 | \item Sentiment computation and aggregation into sentiment measures: \code{\link{ctr_agg}},
26 | \code{\link{sento_lexicons}}, \code{\link{compute_sentiment}}, \code{\link{aggregate.sentiment}},
27 | \code{\link{as.sentiment}}, \code{\link{sento_measures}}, \code{\link{peakdocs}},
28 | \code{\link{peakdates}}, \code{\link{aggregate.sento_measures}}
29 | \item Sparse modeling: \code{\link{ctr_model}}, \code{\link{sento_model}}
30 | \item Prediction and post-modeling analysis: \code{\link{predict.sento_model}},
31 | \code{\link{attributions}}
32 | }
33 | }
34 | 
35 | \references{
36 | Ardia, Bluteau, Borms and Boudt (2021). \strong{The R Package sentometrics to Compute, Aggregate, and
37 | Predict with Textual Sentiment}. \emph{Journal of Statistical Software 99(2), 1-40},
38 | \doi{10.18637/jss.v099.i02}.
39 | 
40 | Ardia, Bluteau and Boudt (2019). \strong{Questioning the news about economic growth: Sparse forecasting using
41 | thousands of news-based sentiment values}. \emph{International Journal of Forecasting 35, 1370-1386},
42 | \doi{10.1016/j.ijforecast.2018.10.010}.
43 | }
44 | \seealso{
45 | Useful links:
46 | \itemize{
47 |   \item \url{https://sentometrics-research.com/sentometrics/}
48 |   \item Report bugs at \url{https://github.com/SentometricsResearch/sentometrics/issues}
49 | }
50 | 
51 | }
52 | \author{
53 | \strong{Maintainer}: Samuel Borms \email{borms_sam@hotmail.com} (\href{https://orcid.org/0000-0001-9533-1870}{ORCID})
54 | 
55 | Authors:
56 | \itemize{
57 |   \item David Ardia \email{david.ardia@hec.ca} (\href{https://orcid.org/0000-0003-2823-782X}{ORCID})
58 |   \item Keven Bluteau \email{keven.bluteau@usherbrooke.ca} (\href{https://orcid.org/0000-0003-2990-4807}{ORCID})
59 |   \item Kris Boudt \email{kris.boudt@vub.be} (\href{https://orcid.org/0000-0002-1000-5142}{ORCID})
60 | }
61 | 
62 | Other contributors:
63 | \itemize{
64 |   \item Jeroen Van Pelt \email{jeroenvanpelt@hotmail.com} [contributor]
65 |   \item Andres Algaba \email{andres.algaba@vub.be} [contributor]
66 | }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/man/subset.sento_measures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentomeasures_methods.R
 3 | \name{subset.sento_measures}
 4 | \alias{subset.sento_measures}
 5 | \title{Subset sentiment measures}
 6 | \usage{
 7 | \method{subset}{sento_measures}(x, subset = NULL, select = NULL, delete = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.}
11 | 
12 | \item{subset}{a logical (non-\code{character}) expression indicating the rows to keep. If a
13 | \code{numeric} input is given, it is used for row index subsetting.}
14 | 
15 | \item{select}{a \code{character} vector of the lexicon, feature and time weighting scheme names, to indicate which
16 | measures need to be selected, or as a \code{list} of \code{character} vectors, possibly with separately specified
17 | combinations (consisting of one unique lexicon, one unique feature, and one unique time weighting scheme at maximum).}
18 | 
19 | \item{delete}{see the \code{select} argument, but to delete measures.}
20 | 
21 | \item{...}{not used.}
22 | }
23 | \value{
24 | A modified \code{sento_measures} object, with only the remaining rows and sentiment measures,
25 | including updated information and statistics, but the original sentiment scores \code{data.table} untouched.
26 | }
27 | \description{
28 | Subsets rows of the sentiment measures based on its columns.
29 | }
30 | \examples{
31 | data("usnews", package = "sentometrics")
32 | data("list_lexicons", package = "sentometrics")
33 | data("list_valence_shifters", package = "sentometrics")
34 | 
35 | # construct a sento_measures object to start with
36 | corpus <- sento_corpus(corpusdf = usnews)
37 | corpusSample <- quanteda::corpus_sample(corpus, size = 500)
38 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
39 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3)
40 | sm <- sento_measures(corpusSample, l, ctr)
41 | 
42 | # three specified indices in required list format
43 | three <- as.list(
44 |   stringi::stri_split(c("LM_en--economy--linear",
45 |                         "HENRY_en--wsj--equal_weight",
46 |                         "HENRY_en--wapo--equal_weight"),
47 |                       regex = "--")
48 | )
49 | 
50 | # different subsets
51 | sub1 <- subset(sm, HENRY_en--economy--equal_weight >= 0.01)
52 | sub2 <- subset(sm, date \%in\% get_dates(sm)[3:12])
53 | sub3 <- subset(sm, 3:12)
54 | sub4 <- subset(sm, 1:100) # warning
55 | 
56 | # different selections
57 | sel1 <- subset(sm, select = "equal_weight")
58 | sel2 <- subset(sm, select = c("equal_weight", "linear"))
59 | sel3 <- subset(sm, select = c("linear", "LM_en"))
60 | sel4 <- subset(sm, select = list(c("linear", "wsj"), c("linear", "economy")))
61 | sel5 <- subset(sm, select = three)
62 | 
63 | # different deletions
64 | del1 <- subset(sm, delete = "equal_weight")
65 | del2 <- subset(sm, delete = c("linear", "LM_en"))
66 | del3 <- subset(sm, delete = list(c("linear", "wsj"), c("linear", "economy")))
67 | del4 <- subset(sm, delete = c("equal_weight", "linear")) # warning
68 | del5 <- subset(sm, delete = three)
69 | 
70 | }
71 | \author{
72 | Samuel Borms
73 | }
74 | 


--------------------------------------------------------------------------------
/man/usnews.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sentometrics.R
 3 | \docType{data}
 4 | \name{usnews}
 5 | \alias{usnews}
 6 | \title{Texts (not) relevant to the U.S. economy}
 7 | \format{
 8 | A \code{data.frame}, formatted as required to be an input for \code{\link{sento_corpus}}.
 9 | }
10 | \source{
11 | \strong{Economic News Article Tone and Relevance} dataset. Retrieved
12 | November 1, 2017.
13 | }
14 | \usage{
15 | data("usnews")
16 | }
17 | \description{
18 | A collection of texts annotated by humans in terms of relevance to the U.S. economy or not. The texts come from two major
19 | journals in the U.S. (The Wall Street Journal and The Washington Post) and cover 4145 documents between 1995 and 2014. It
20 | contains following information:
21 | 
22 | \itemize{
23 |   \item id. A \code{character} ID identifier.
24 |   \item date. Date as \code{"yyyy-mm-dd"}.
25 |   \item texts. Texts in \code{character} format.
26 |   \item wsj. Equals 1 if the article comes from The Wall Street Journal.
27 |   \item wapo. Equals 1 if the article comes from The Washington Post (complementary to `wsj').
28 |   \item economy. Equals 1 if the article is relevant to the U.S. economy.
29 |   \item noneconomy. Equals 1 if the article is not relevant to the U.S. economy (complementary to `economy').
30 | }
31 | }
32 | \examples{
33 | data("usnews", package = "sentometrics")
34 | usnews[3192, "texts"]
35 | usnews[1:5, c("id", "date", "texts")]
36 | 
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/man/weights_almon.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{weights_almon}
 4 | \alias{weights_almon}
 5 | \title{Compute Almon polynomials}
 6 | \usage{
 7 | weights_almon(n, orders = 1:3, do.inverse = TRUE, do.normalize = TRUE)
 8 | }
 9 | \arguments{
10 | \item{n}{a single \code{numeric} to indicate the lag length (cf., \emph{n}).}
11 | 
12 | \item{orders}{a \code{numeric} vector as the sequence of the Almon orders (cf., \emph{r}). The maximum value
13 | corresponds to \emph{R}.}
14 | 
15 | \item{do.inverse}{\code{TRUE} if the inverse Almon polynomials should be calculated as well.}
16 | 
17 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.}
18 | }
19 | \value{
20 | A \code{data.frame} of all Almon polynomial weighting curves, of size \code{length(orders)} (times two if
21 | \code{do.inverse = TRUE}).
22 | }
23 | \description{
24 | Computes Almon polynomial weighting curves. Handy to self-select specific time aggregation weighting schemes
25 | for input in \code{\link{ctr_agg}} using the \code{weights} argument.
26 | }
27 | \details{
28 | The Almon polynomial formula implemented is:
29 | \eqn{(1 - (1 - i/n)^{r})(1 - i/n)^{R - r}}{(1 - (1 - i/n)^r) * (1 - i/n)^(R - r)}, where \eqn{i} is the lag index ordered from
30 | 1 to \eqn{n}. The inverse is computed by changing \eqn{i/n} to \eqn{1 - i/n}.
31 | }
32 | \seealso{
33 | \code{\link{ctr_agg}}
34 | }
35 | 


--------------------------------------------------------------------------------
/man/weights_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{weights_beta}
 4 | \alias{weights_beta}
 5 | \title{Compute Beta weighting curves}
 6 | \usage{
 7 | weights_beta(n, a = 1:4, b = 1:4, do.normalize = TRUE)
 8 | }
 9 | \arguments{
10 | \item{n}{a single \code{numeric} to indicate the lag length (cf., \emph{n}).}
11 | 
12 | \item{a}{a \code{numeric} as the first parameter (cf., \emph{a}).}
13 | 
14 | \item{b}{a \code{numeric} as the second parameter (cf., \emph{b}).}
15 | 
16 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.}
17 | }
18 | \value{
19 | A \code{data.frame} of beta weighting curves per combination of \code{a} and \code{b}. If \code{n = 1},
20 | all weights are set to 1.
21 | }
22 | \description{
23 | Computes Beta weighting curves as in Ghysels, Sinko and Valkanov (2007). Handy to self-select specific
24 | time aggregation weighting schemes for input in \code{\link{ctr_agg}} using the \code{weights} argument.
25 | }
26 | \details{
27 | The Beta weighting abides by following formula:
28 | \eqn{f(i/n; a, b) / \sum_{i}(i/n; a, b)}{f(i/n; a, b) / \sum(i/n; a, b)}, where \eqn{i} is the lag index ordered
29 | from 1 to \eqn{n}, \eqn{a} and \eqn{b} are two decay parameters, and
30 | \eqn{f(x; a, b) = (x^{a - 1}(1 - x)^{b - 1}\Gamma(a + b)) / (\Gamma(a)\Gamma(b))}{f(x; a, b)
31 |  = (x^(a - 1) * (1 - x)^(b - 1) * T(a + b)) / (T(a) * T(b))}, where \eqn{\Gamma(.)}{T(.)} is
32 | the \code{\link{gamma}} function.
33 | }
34 | \references{
35 | Ghysels, Sinko and Valkanov (2007). \strong{MIDAS regressions: Further results and new directions}.
36 | \emph{Econometric Reviews 26, 53-90}, \doi{10.1080/07474930600972467}.
37 | }
38 | \seealso{
39 | \code{\link{ctr_agg}}
40 | }
41 | 


--------------------------------------------------------------------------------
/man/weights_exponential.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{weights_exponential}
 4 | \alias{weights_exponential}
 5 | \title{Compute exponential weighting curves}
 6 | \usage{
 7 | weights_exponential(
 8 |   n,
 9 |   alphas = seq(0.1, 0.5, by = 0.1),
10 |   do.inverse = FALSE,
11 |   do.normalize = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{n}{a single \code{numeric} to indicate the lag length.}
16 | 
17 | \item{alphas}{a \code{numeric} vector of decay factors, between 0 and 1, but multiplied by 10 in
18 | the implementation.}
19 | 
20 | \item{do.inverse}{\code{TRUE} if the inverse exponential curves should be calculated as well.}
21 | 
22 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.}
23 | }
24 | \value{
25 | A \code{data.frame} of exponential weighting curves per value of \code{alphas}.
26 | }
27 | \description{
28 | Computes exponential weighting curves. Handy to self-select specific time aggregation weighting schemes
29 | for input in \code{\link{ctr_agg}} using the \code{weights} argument.
30 | }
31 | \seealso{
32 | \code{\link{ctr_agg}}
33 | }
34 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://sentometricsresearch.github.io/sentometrics
 2 | 
 3 | destination: docs
 4 | 
 5 | template:
 6 |   params:
 7 |     bootswatch: flatly # https://bootswatch.com
 8 |     docsearch:
 9 |       api_key: 29d61aa2be101325ab9a82514b58064b
10 |       index_name: sentometrics
11 | 
12 | toc:
13 |   depth: 3
14 | 
15 | navbar:
16 |   structure:
17 |     left:  [home, intro, articles, contributions, news, reference]
18 |     right: [docsearch, github]
19 |   components:
20 |     articles:
21 |       text: Examples
22 |       menu:
23 |       - text: Tutorials
24 |       - text: Corpus manipulation
25 |         href: articles/examples/corpus.html
26 |       - text: Sentiment computation
27 |         href: articles/examples/sentiment.html
28 |       - text: Index aggregation
29 |         href: articles/examples/indexation.html
30 |       - text: Modeling
31 |         href: articles/examples/modeling.html
32 |       - text: -------
33 |       - text: Applications
34 |       - text: Creating EPU indices
35 |         href: articles/applications/epu.html
36 |       - text: Predicting the VIX index
37 |         href: articles/applications/vix.html
38 |     contributions:
39 |       text: Contributions
40 |       menu:
41 |       - text: Analyzing Gopress data
42 |         href: articles/contributions/gopress.html
43 |       - text: Intratextual sentiment analysis
44 |         href: articles/contributions/isa.html
45 |     news:
46 |       text: News
47 |       menu:
48 |       - text: Development
49 |         href: articles/development.html
50 |       - text: Releases
51 |         href: news/index.html
52 |     reference: ~
53 | 
54 | authors:
55 |   Samuel Borms:
56 |     href: https://www.linkedin.com/in/sam-borms
57 |   David Ardia:
58 |     href: https://ardiad.github.io
59 |   Keven Bluteau:
60 |     href: https://www.kevenbluteau.com
61 |   Kris Boudt:
62 |     href: https://linkedin.com/in/krisboudt
63 |   Jeroen Van Pelt:
64 |     href: https://linkedin.com/in/vanpeltjeroen
65 |   Andres Algaba:
66 |     href: https://linkedin.com/in/andresalgaba
67 | 
68 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()")
3 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DRCPP_PARALLEL_USE_TBB=1
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()")
3 | 


--------------------------------------------------------------------------------
/src/SentimentScorerBigrams.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef SENTIMENT_BIGRAMS
 3 | #define SENTIMENT_BIGRAMS
 4 | 
 5 | struct SentimentScorerBigrams : public RcppParallel::Worker {
 6 | 
 7 |   // thread-safe input
 8 |   const std::vector< std::vector<std::string> > texts;
 9 |   const std::unordered_map< std::string, std::vector< double > > lexiconMap;
10 |   const std::unordered_map< std::string, double > valenceMap;
11 |   const std::string how;
12 |   const int nL;
13 |   const int N;
14 |   std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap;
15 |   std::unordered_map< std::string, double > inverseFrequencyMap;
16 |   const bool isFreqWeighting;
17 | 
18 |   // output
19 |   RcppParallel::RMatrix< double > sentScores;
20 | 
21 |   SentimentScorerBigrams(const std::vector< std::vector< std::string > > texts,
22 |                          const std::unordered_map< std::string, std::vector< double > > lexiconMap,
23 |                          const std::unordered_map< std::string, double > valenceMap,
24 |                          const std::string how,
25 |                          int nL,
26 |                          int N,
27 |                          std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap,
28 |                          std::unordered_map< std::string, double > inverseFrequencyMap,
29 |                          const bool isFreqWeighting,
30 |                          Rcpp::NumericMatrix sentScores)
31 |     : texts(texts), lexiconMap(lexiconMap), valenceMap(valenceMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap),
32 |       inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {}
33 | 
34 |   void operator()(std::size_t begin, std::size_t end) {
35 | 
36 |     for (std::size_t i = begin; i < end; i++) {
37 | 
38 |       std::vector< std::string > tokens = texts[i];
39 |       std::vector< double > scores(nL, 0.0);
40 |       std::vector< double > nPolarized(nL, 0.0);
41 |       double normalizer = 0.0;
42 |       int nTokens = tokens.size();
43 |       int nPuncts = 0;
44 |       std::vector< double > tokenShifters(nTokens, 1.0);
45 |       std::vector< double > tokenWeights(nTokens, 0.0);
46 |       std::vector< std::vector< double > > tokenScores(nTokens,std::vector< double >(nL, 0.0));
47 |       std::unordered_map< std::string, double > freqMap;
48 |       double maxTokenFrequency = 1.0;
49 |       if (isFreqWeighting) {
50 |         update_frequency_map(freqMap, frequencyMap, i);
51 |       }
52 | 
53 |       for (int j = 0; j < nTokens; j++) {
54 |         std::string token = tokens[j];
55 |         double tokenFrequency = 1.0, tokenInverseFrequency = 1.0;
56 |         if (isFreqWeighting) {
57 |           update_token_frequency(tokenFrequency, freqMap, token);
58 |           update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how);
59 |         }
60 |         if (lexiconMap.find(token) != lexiconMap.end()) {
61 |           tokenScores[j] = lexiconMap.at(token);
62 | 
63 |           if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") {
64 |             update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how,
65 |                                  nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N);
66 |           }
67 | 
68 |           int k = std::max(0, j - 1);
69 |           if (valenceMap.find(tokens[k]) != valenceMap.end()) { // bigram valence shifting
70 |             tokenShifters[j] = valenceMap.at(tokens[k]);
71 |           }
72 |         }
73 |       }
74 |       update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters,
75 |                           tokenWeights, nL, nTokens, how, nPuncts);
76 | 
77 |       sentScores(i, 0) = nTokens;
78 |       for (int m = 0; m < nL; m++) {
79 |         sentScores(i, m + 1) = scores[m];
80 |       }
81 | 
82 |     }
83 |   }
84 | 
85 | };
86 | 
87 | #endif
88 | 
89 | 


--------------------------------------------------------------------------------
/src/SentimentScorerClusters.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef SENTIMENT_CLUSTERS
  3 | #define SENTIMENT_CLUSTERS
  4 | 
  5 | struct SentimentScorerClusters : public RcppParallel::Worker {
  6 | 
  7 |   // thread-safe input
  8 |   const std::vector< std::vector< std::string > > texts;
  9 |   const std::unordered_map< std::string, std::vector< double > > lexiconMap;
 10 |   const std::unordered_map< std::string, double > valenceMap;
 11 |   const std::string how;
 12 |   const int nL;
 13 |   const int N;
 14 |   std::unordered_map< int, std::unordered_map< std::string, double> > frequencyMap;
 15 |   std::unordered_map< std::string, double > inverseFrequencyMap;
 16 |   const bool isFreqWeighting;
 17 | 
 18 |   // output
 19 |   RcppParallel::RMatrix<double> sentScores;
 20 | 
 21 |   SentimentScorerClusters(const std::vector< std::vector< std::string > > texts,
 22 |                           const std::unordered_map< std::string, std::vector< double > > lexiconMap,
 23 |                           const std::unordered_map< std::string, double > valenceMap,
 24 |                           const std::string how,
 25 |                           int nL,
 26 |                           int N,
 27 |                           std::unordered_map<int, std::unordered_map< std::string, double> > frequencyMap,
 28 |                           std::unordered_map< std::string, double > inverseFrequencyMap,
 29 |                           const bool isFreqWeighting,
 30 |                           Rcpp::NumericMatrix sentScores)
 31 |     : texts(texts), lexiconMap(lexiconMap), valenceMap(valenceMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap),
 32 |       inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {}
 33 | 
 34 |   void operator()(std::size_t begin, std::size_t end) {
 35 | 
 36 |     for (std::size_t i = begin; i < end; i++) {
 37 | 
 38 |       std::vector< std::string > tokens = texts[i];
 39 |       std::vector< double > scores(nL, 0.0);
 40 |       std::vector< double > nPolarized(nL, 0.0);
 41 |       double normalizer = 0.0, maxTokenFrequency = 1.0;
 42 |       int nTokens = tokens.size(), lB = 0, nB = 4, nA = 2;
 43 |       int nPuncts = 0;
 44 |       std::vector< std::vector< double > > tokenScores(nTokens, std::vector< double >(nL, 0.0));
 45 | 
 46 |       std::vector< double > tokenWeights(nTokens, 0.0);
 47 |       std::vector< double > tokenShifters(nTokens, 1.0);
 48 |       std::unordered_map< std::string, double > freqMap;
 49 | 
 50 |       if (isFreqWeighting) {
 51 |         update_frequency_map(freqMap, frequencyMap, i);
 52 |       }
 53 | 
 54 |       for (int j = 0; j < nTokens; j++) {
 55 |         std::string token = tokens[j];
 56 |         double tokenFrequency = 1.0, tokenInverseFrequency = 1.0;
 57 |         if (isFreqWeighting) {
 58 |           update_token_frequency(tokenFrequency, freqMap, token);
 59 |           update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how);
 60 |         }
 61 |         if (lexiconMap.find(token) != lexiconMap.end()) { // hit
 62 |           tokenScores[j] = lexiconMap.at(token);
 63 |           std::vector<int> shifters(3);
 64 | 
 65 |           if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") {
 66 |             update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how,
 67 |                                  nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N);
 68 |           }
 69 | 
 70 |           int st = std::max(lB, j - nB);
 71 |           int en = std::min(nTokens, j + nA + 1);
 72 | 
 73 |           for (int k = st; k < en; k++) {
 74 |             if (k == j) continue;
 75 |             std::string token_k = tokens[k];
 76 |             if (lexiconMap.find(token_k) != lexiconMap.end()) {
 77 |               tokenScores[k] = lexiconMap.at(token_k);
 78 |               if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") {
 79 |                 update_token_weights(tokenWeights, normalizer, nPolarized, k, nTokens, how,
 80 |                                      nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N);
 81 |               }
 82 |             } else if (valenceMap.find(token_k) != valenceMap.end()) {
 83 |               double valType = valenceMap.at(token_k);
 84 |               update_primary_shifters(shifters, valType);
 85 |             }
 86 |           }
 87 |           tokenShifters[j] = compute_cluster_impact(shifters);
 88 | 
 89 |           lB = en + 1; // reset index such that polarity clusters are not overlapping
 90 |           j = en; // updated to j + 1 immediately after
 91 |         }
 92 |       }
 93 |       update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters,
 94 |                           tokenWeights, nL, nTokens, how, nPuncts);
 95 | 
 96 |       sentScores(i, 0) = nTokens;
 97 |       for (int m = 0; m < nL; m++) {
 98 |         sentScores(i, m + 1) = scores[m];
 99 |       }
100 | 
101 |     }
102 |   }
103 | 
104 | };
105 | 
106 | #endif
107 | 
108 | 


--------------------------------------------------------------------------------
/src/SentimentScorerOnegrams.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef SENTIMENT_ONEGRAMS
 3 | #define SENTIMENT_ONEGRAMS
 4 | 
 5 | struct SentimentScorerOnegrams : public RcppParallel::Worker {
 6 | 
 7 |   // thread-safe input
 8 |   const std::vector< std::vector< std::string > > texts;
 9 |   const std::unordered_map< std::string, std::vector< double> > lexiconMap;
10 |   const std::string how;
11 |   const int nL;
12 |   const int N;
13 |   std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap;
14 |   std::unordered_map< std::string, double > inverseFrequencyMap;
15 |   const bool isFreqWeighting;
16 | 
17 |   // output
18 |   RcppParallel::RMatrix<double> sentScores;
19 | 
20 |   SentimentScorerOnegrams(const std::vector< std::vector< std::string > > texts,
21 |                           const std::unordered_map< std::string, std::vector< double > > lexiconMap,
22 |                           const std::string how,
23 |                           int nL,
24 |                           int N,
25 |                           std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap,
26 |                           std::unordered_map< std::string, double > inverseFrequencyMap,
27 |                           const bool isFreqWeighting,
28 |                           Rcpp::NumericMatrix sentScores)
29 |     : texts(texts), lexiconMap(lexiconMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap),
30 |       inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {}
31 | 
32 |   void operator()(std::size_t begin, std::size_t end) {
33 | 
34 |     for (std::size_t i = begin; i < end; i++) {
35 | 
36 |       std::vector< std::string > tokens = texts[i];
37 |       std::vector< double > scores(nL, 0.0); // scores for 1 texts for different lexicons
38 |       std::vector< double > nPolarized(nL, 0.0);
39 |       double normalizer = 0.0;
40 |       int nTokens = tokens.size();
41 |       int nPuncts = 0;
42 |       std::vector< std::vector< double > > tokenScores(nTokens,std::vector< double >(nL, 0.0));
43 |       std::vector< double > tokenWeights(nTokens, 0.0);
44 |       std::vector< double > tokenShifters(nTokens, 1.0);
45 |       std::unordered_map< std::string, double > freqMap;
46 |       double maxTokenFrequency = 1.0;
47 |       if (isFreqWeighting) {
48 |         update_frequency_map(freqMap, frequencyMap, i);
49 |       }
50 | 
51 |       for (int j = 0; j < nTokens; j++) {
52 |         std::string token = tokens[j];
53 |         double tokenFrequency = 1.0;
54 |         double tokenInverseFrequency = 1.0;
55 |         if (isFreqWeighting) {
56 |           update_token_frequency(tokenFrequency, freqMap, token);
57 |           update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how);
58 |         }
59 | 
60 |         if (lexiconMap.find(token) != lexiconMap.end()) {
61 |           tokenScores[j] = lexiconMap.at(token); // get value of token for each lexicon
62 |         }
63 |         if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") {
64 |           update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how,
65 |                                nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N);
66 |         }
67 |       }
68 |       update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters,
69 |                           tokenWeights, nL, nTokens, how, nPuncts);
70 | 
71 |       sentScores(i, 0) = nTokens;
72 |       for (int m = 0; m < nL; m++) {
73 |         sentScores(i, m + 1) = scores[m];
74 |       }
75 | 
76 |     }
77 |   }
78 | 
79 | };
80 | 
81 | #endif
82 | 
83 | 


--------------------------------------------------------------------------------
/src/compute_df.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <RcppArmadillo.h>
 3 | // [[Rcpp::depends(RcppArmadillo)]]
 4 | 
 5 | using namespace arma;
 6 | using namespace Rcpp;
 7 | 
 8 | // elastic net degrees of freedom estimator (Tibshirani and Taylor, 2012)
 9 | 
10 | // [[Rcpp::export]]
11 | Rcpp::NumericVector compute_df(double alpha,
12 |                                Rcpp::NumericVector lambda,
13 |                                Rcpp::List xA) {
14 |   int nLambda = lambda.size();
15 |   Rcpp::NumericVector dfA(nLambda);
16 |   for (int i = 0; i < nLambda; i++) {
17 |     arma::mat matr = xA[i];
18 |     double nA = matr.n_cols;
19 |     if (nA == 0) {
20 |       dfA[i] = 1L;
21 |     } else if (alpha == 0) { // ridge df
22 |       arma::vec s;
23 |       bool pass = arma::svd(s, matr);
24 |       if (pass == true) {
25 |         arma::vec ss = arma::pow(s, 2);
26 |         double estimate = arma::sum(ss / (ss + lambda[i]));
27 |         dfA[i] = estimate;
28 |       } else {
29 |         dfA[i] = NumericVector::get_na();
30 |       }
31 |     } else if (alpha == 1) { // lasso df
32 |       dfA[i] = nA;
33 |     } else { // elastic net df
34 |       arma::mat inverted;
35 |       arma::mat toInvert = matr.t() * matr + (1 - alpha) * lambda[i] * arma::eye<arma::mat>(nA, nA);
36 |       bool pass = arma::inv(inverted, toInvert);
37 |       if (pass == true) {
38 |         double estimate = arma::sum(arma::diagvec(matr * inverted * matr.t()));
39 |         dfA[i] = estimate;
40 |       } else {
41 |         dfA[i] = NumericVector::get_na();
42 |       }
43 |     }
44 |   }
45 |   return dfA;
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/src/compute_sentiment_onegrams.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Rcpp.h>
 3 | #include <RcppParallel.h>
 4 | #include "utils.h"
 5 | #include "SentimentScorerOnegrams.h"
 6 | 
 7 | // [[Rcpp::depends(RcppParallel)]]
 8 | using namespace std;
 9 | using namespace Rcpp;
10 | using namespace RcppParallel;
11 | 
12 | // [[Rcpp::export]]
13 | Rcpp::NumericMatrix compute_sentiment_onegrams(std::vector< std::vector<std::string>> texts,
14 |                                                Rcpp::List lexicons,
15 |                                                std::string how) {
16 | 
17 |   int N = texts.size(); // already tokenized texts
18 |   int nL = lexicons.size();
19 |   bool isFreqWeighting = is_frequency_weighting(how);
20 |   Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL);
21 | 
22 |   std::unordered_map< std::string, std::vector< double> > lexiconMap = make_lexicon_map(lexicons, nL);
23 |   std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap;
24 |   std::unordered_map< std::string, double > inverseFrequencyMap;
25 | 
26 |   if (isFreqWeighting) {
27 |     make_frequency_maps(frequencyMap, inverseFrequencyMap, texts);
28 |   }
29 |   Rcpp::NumericMatrix sentScores(N, nL + 1); // output matrix of word count and sentiment scores
30 |   SentimentScorerOnegrams sentimentScorer(texts, lexiconMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores);
31 |   parallelFor(0, N, sentimentScorer);
32 |   colnames(sentScores) = colNames;
33 | 
34 |   return(sentScores);
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/src/compute_sentiment_sentences.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Rcpp.h>
 3 | #include <RcppParallel.h>
 4 | #include "utils.h"
 5 | #include "SentimentScorerSentences.h"
 6 | 
 7 | // [[Rcpp::depends(RcppParallel)]]
 8 | 
 9 | using namespace std;
10 | using namespace Rcpp;
11 | using namespace RcppParallel;
12 | 
13 | // [[Rcpp::export]]
14 | Rcpp::NumericMatrix compute_sentiment_sentences(std::vector<std::vector<std::string>> texts,
15 |                                                 Rcpp::List lexicons,
16 |                                                 std::string how,
17 |                                                 int valenceType) {
18 | 
19 |   int N = texts.size(); // already tokenized texts
20 |   int nL = lexicons.size();
21 |   if (valenceType != 0) {
22 |     nL = lexicons.size() - 1; // the last one has the valence shifters
23 |   }
24 |   bool isFreqWeighting = is_frequency_weighting(how);
25 | 
26 |   Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL);
27 | 
28 |   std::unordered_map< std::string, std::vector< double> > lexiconMap = make_lexicon_map(lexicons, nL);
29 |   std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap;
30 |   std::unordered_map< std::string, double > inverseFrequencyMap;
31 |   if (isFreqWeighting) {
32 |     make_frequency_maps(frequencyMap, inverseFrequencyMap, texts);
33 |   }
34 |   std::unordered_map< std::string, double > valenceMap;
35 |   if (valenceType != 0) {
36 |     Rcpp::List valenceList = lexicons[nL];
37 |     valenceMap = make_valence_map(valenceList);
38 |   }
39 | 
40 |   Rcpp::NumericMatrix sentScores(N, nL + 1);
41 | 
42 |   SentimentScorerSentences sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, valenceType, sentScores);
43 |   parallelFor(0, N, sentimentScorer);
44 | 
45 |   colnames(sentScores) = colNames;
46 | 
47 |   return(sentScores);
48 | 
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/src/compute_sentiment_valence.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Rcpp.h>
 3 | #include <RcppParallel.h>
 4 | #include "utils.h"
 5 | #include "SentimentScorerBigrams.h"
 6 | #include "SentimentScorerClusters.h"
 7 | // [[Rcpp::depends(RcppParallel)]]
 8 | 
 9 | using namespace std;
10 | using namespace Rcpp;
11 | using namespace RcppParallel;
12 | 
13 | // [[Rcpp::export]]
14 | Rcpp::NumericMatrix compute_sentiment_valence(std::vector< std::vector<std::string> > texts,
15 |                                               Rcpp::List lexicons,
16 |                                               std::string how) {
17 | 
18 |   int N = texts.size(); // already tokenized texts
19 |   int nL = lexicons.size() - 1; // the last one has the valence shifters
20 |   bool isFreqWeighting = is_frequency_weighting(how);
21 |   Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL);
22 | 
23 |   std::unordered_map< std::string, std::vector<double> > lexiconMap = make_lexicon_map(lexicons, nL);
24 | 
25 |   Rcpp::List valenceList = lexicons[nL];
26 |   Rcpp::CharacterVector valenceCols = valenceList.names();
27 |   std::unordered_map< std::string, double > valenceMap = make_valence_map(valenceList);
28 |   std::unordered_map< int, std::unordered_map<std::string, double> > frequencyMap;
29 |   std::unordered_map< std::string, double > inverseFrequencyMap;
30 |   if (isFreqWeighting) {
31 |     make_frequency_maps(frequencyMap, inverseFrequencyMap, texts);
32 |   }
33 | 
34 |   Rcpp::NumericMatrix sentScores(N, nL + 1); // output matrix of word count and sentiment scores
35 |   if (valenceCols[1] == "y") {
36 |     SentimentScorerBigrams sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores);
37 |     parallelFor(0, N, sentimentScorer);
38 |   } else if (valenceCols[1] == "t") {
39 |     SentimentScorerClusters sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores);
40 |     parallelFor(0, N, sentimentScorer);
41 |   }
42 | 
43 |   colnames(sentScores) = colNames;
44 | 
45 |   return(sentScores);
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/src/fill_NAs.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Rcpp.h>
 3 | 
 4 | using namespace Rcpp;
 5 | 
 6 | // [[Rcpp::export]]
 7 | Rcpp::NumericMatrix fill_NAs(Rcpp::NumericMatrix x) {
 8 |   int n = x.nrow();
 9 |   int m = x.ncol();
10 |   for (int i = 0; i < m; i++) {
11 |     int k = 0; // current index of fill value
12 |     Rcpp::NumericVector col = x(_, i);
13 |     for (int j = 0; j < n; j++) {
14 |       if (NumericVector::is_na(col[j])) { 
15 |         col[j] = col[k]; // add in fill value
16 |       } else {
17 |         k = j; // update index
18 |       }
19 |     }
20 |     x(_, i) = col;
21 |   }
22 |   return(x);
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/src/get_dtf_vectors.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <Rcpp.h>
 3 | #include "utils.h"
 4 | using namespace Rcpp;
 5 | 
 6 | // [[Rcpp::export]]
 7 | List get_dtf_vectors(std::vector<std::vector<std::string>> texts) {
 8 |   std::unordered_map< int, std::unordered_map< std::string, double > > tokenMap;
 9 |   std::unordered_map< std::string, double > docMap;
10 |   make_frequency_maps(tokenMap, docMap, texts);
11 |   return List::create(Named("DF") = docMap, Named("TF") = tokenMap);
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | 
2 | library("testthat")
3 | library("sentometrics")
4 | 
5 | test_check("sentometrics")
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/testthat/test_aggregation.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Aggregation")
 3 | 
 4 | library("data.table")
 5 | library("quanteda")
 6 | 
 7 | set.seed(123)
 8 | 
 9 | # corpus, lexicon and aggregation control creation
10 | data("usnews")
11 | corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 1000)
12 | 
13 | data("list_lexicons")
14 | lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")])
15 | lexClust <- sento_lexicons(list_lexicons[c("GI_en", "LM_en", "HENRY_en")],
16 |                            list_valence_shifters[["en"]][, c("x", "t")])
17 | 
18 | ### tests from here ###
19 | 
20 | ctr1 <- ctr_agg(howWithin = "proportionalPol", howDocs = "equal_weight", howTime = "almon", by = "month",
21 |                 lag = 5, ordersAlm = 1:3, do.inverseAlm = TRUE)
22 | sentMeas1 <- sento_measures(corpus, lex, ctr1)
23 | 
24 | ctr2 <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("equal_weight", "linear", "own"),
25 |                 by = "year", lag = 2, weights = data.frame(q1 = c(0.25, 0.75), q3 = c(0.75, 0.25)),
26 |                 do.ignoreZeros = FALSE, do.sentence = TRUE)
27 | sentMeas2 <- sento_measures(corpus, lex, ctr2)
28 | 
29 | ctr3 <- ctr_agg(howWithin = "counts", howDocs = "inverseProportional", howTime = c("equal_weight", "own"),
30 |                 by = "year", lag = 3, weights = data.frame(GI_en = c(0.3, 0.6, 0.1)))
31 | 
32 | ctr4 <- ctr_agg(howWithin = "UShaped", howDocs = "inverseProportional", howTime = "exponential",
33 |                 do.inverseExp = TRUE, alphas = c(0.1, 0.2, 0.3), by = "day", lag = 180)
34 | 
35 | ctr5 <- ctr_agg(howWithin = "counts", howDocs = "exponential", alphaExpDocs = 0.2,
36 |                 howTime = "linear", by = "year", lag = 3)
37 | 
38 | ctr6 <- ctr_agg(howWithin = "TFIDF", howDocs = "inverseExponential", alphaExpDocs = 0.1,
39 |                 howTime = "equal_weight", by = "week", lag = 7)
40 | 
41 | # sento_measures
42 | test_that("Number of columns coincide with provided dimensions", {
43 |   expect_equal(nmeasures(sentMeas1), length(sentMeas1$features) * length(sentMeas1$lexicons) * length(sentMeas1$time))
44 |   expect_equal(nmeasures(sentMeas2), length(sentMeas2$features) * length(sentMeas2$lexicons) * length(sentMeas2$time))
45 | })
46 | 
47 | # ctr_agg
48 | test_that("Aggregation control function breaks when wrong inputs supplied", {
49 |   expect_error(ctr_agg(howWithin = c("oops", "again"), howDocs = c("mistake", "forYou"), howTime = "bad",
50 |                        lag = 42, by = "infinitely", fill = "theMartiniPolice", nCore = c("yes", "man")))
51 |   expect_error(ctr_agg(howTime = c("almon", "beta", "exponential"), lag = 0,
52 |                        ordersAlm = -1:2, aBeta = -2, bBeta = -3, alphasExp = c(-1, -3)))
53 |   expect_message(ctr_agg(howTime = "linear", lag = 4, weights = data.frame(a = c(1/2, 1/2))))
54 |   expect_error(ctr_agg(howTime = "own", lag = 12, weights = data.frame("dot--hacker" = rep(1/12, 12), check.names = FALSE)))
55 |   expect_message(ctr_agg(howTime = c("linear", "beta"), lag = 1))
56 | })
57 | 
58 | # aggregate.sentiment
59 | s1 <- compute_sentiment(corpus, lex, how = "proportional")
60 | s2 <- compute_sentiment(as.character(corpus), lex, how = "counts")
61 | s3 <- compute_sentiment(corpus, lexClust, how = "proportionalSquareRoot", do.sentence = TRUE)
62 | sentimentAgg <- aggregate(s3, ctr_agg(lag = 7), do.full = FALSE)
63 | test_that("Test input and output of sentiment aggregation functionality", {
64 |   expect_true(inherits(s1, "sentiment"))
65 |   expect_true(inherits(s2, "data.table"))
66 |   expect_true(inherits(s3, "sentiment"))
67 |   expect_true(inherits(aggregate(s1, ctr1), "sento_measures"))
68 |   expect_true(inherits(aggregate(s3, ctr1), "sento_measures")) # sentence-level with dates (full)
69 |   expect_true(inherits(aggregate(s3, ctr1, do.full = FALSE), "sentiment"))
70 |   expect_error(aggregate(s2, ctr2)) # doc-level but no dates
71 |   expect_error(sento_measures(corpus, lex, ctr3)) # because overlapping names specified
72 |   expect_true(inherits(sento_measures(corpus, lex, ctr4), "sento_measures"))
73 |   expect_true(inherits(sento_measures(corpus, lex, ctr5), "sento_measures"))
74 |   expect_true(inherits(sento_measures(corpus, lex, ctr6), "sento_measures"))
75 |   # expect_true(all.equal(sentimentAgg[["word_count"]], s1[["word_count"]]))
76 | })
77 | 
78 | # peakdocs
79 | test_that("Output for peak documents extraction in line with input", {
80 |   expect_length(peakdocs(s1, n = 7, type = "both"), 7)
81 |   expect_length(peakdocs(s1, n = 11, type = "pos"), 11)
82 |   expect_length(peakdocs(s1, n = 1, type = "neg"), 1)
83 |   expect_length(peakdocs(s1, n = 25, type = "both", do.average = TRUE), 25)
84 | })
85 | 
86 | # peakdates
87 | test_that("Output for peak dates extraction in line with input", {
88 |   expect_length(peakdates(sentMeas1, n = 15, type = "both"), 15)
89 |   expect_length(peakdates(sentMeas1, n = 21, type = "pos"), 21)
90 |   expect_length(peakdates(sentMeas1, n = 4, type = "neg"), 4)
91 |   expect_length(peakdates(sentMeas1, n = 10, type = "both", do.average = TRUE), 10)
92 | })
93 | 
94 | 


--------------------------------------------------------------------------------
/tests/testthat/test_attribution.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Attribution")
 3 | 
 4 | cat("\n")
 5 | 
 6 | library("data.table")
 7 | library("quanteda")
 8 | 
 9 | set.seed(123)
10 | 
11 | # corpus, lexicon and aggregation control creation
12 | data("usnews")
13 | corpus <- quanteda::corpus_sample(
14 |   quanteda::corpus_subset(sento_corpus(corpusdf = usnews), date >= "1997-01-01" & date <= "2000-12-01"),
15 |   500
16 | )
17 | 
18 | data("list_lexicons")
19 | lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")])
20 | ctrA <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = "almon", by = "day",
21 |                 lag = 24, ordersAlm = 1:3, do.inverseAlm = TRUE, do.ignoreZeros = FALSE, fill = "latest")
22 | 
23 | sento_measures <- sento_measures(corpus, lex, ctrA)
24 | 
25 | # preparation of estimation data
26 | N <- nobs(sento_measures)
27 | y <- rnorm(N) # random y variable
28 | x <- data.frame(runif(N), rnorm(N)) # two additional random x variables
29 | colnames(x) <- c("x1", "x2")
30 | 
31 | # model run
32 | ctrM <- ctr_model(model = "gaussian", type = "Cp", do.iter = TRUE, h = 3, lambdas = NULL,
33 |                   nSample = N - 12, do.shrinkage.x = TRUE, alphas = 0)
34 | out <- sento_model(sento_measures, y, x = x, ctr = ctrM)
35 | 
36 | ### tests from here ###
37 | 
38 | attributions <- attributions(out, sento_measures, do.normalize = FALSE)
39 | 
40 | l <- rowSums(attributions$lexicons[, -1], na.rm = TRUE)
41 | f <- rowSums(attributions$features[, -1], na.rm = TRUE)
42 | t <- rowSums(attributions$time[, -1], na.rm = TRUE)
43 | la <- rowSums(attributions$lags[, -1], na.rm = TRUE)
44 | # d <- as.vector(sapply(attributions$documents, function(x) return(sum(x$attrib, na.rm = TRUE))))
45 | 
46 | TOL <- 1e-04
47 | 
48 | # attributions
49 | test_that("Attributions across all dimensions should be the same across rows", {
50 |   expect_equal(l, f)
51 |   expect_equal(l, t)
52 |   expect_equal(l, la, tolerance = TOL)
53 |   # expect_equal(l, d) # does not hold because fill = "latest"
54 |   expect_equal(f, t)
55 |   expect_equal(f, la, tolerance = TOL)
56 |   # expect_equal(f, d)
57 |   expect_equal(t, la, tolerance = TOL)
58 |   # expect_equal(t, d)
59 |   # expect_equal(la, d)
60 | })
61 | 
62 | # plot.attributions
63 | p <- plot(attributions, group = sample(c("features", "lexicons", "time", "lags"), 1))
64 | test_that("Plot is a ggplot object", {
65 |   expect_true(inherits(p, "ggplot"))
66 | })
67 | 
68 | 


--------------------------------------------------------------------------------
/tests/testthat/test_methods_sentomeasures.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Methods sentomeasures")
 3 | 
 4 | library("data.table")
 5 | library("quanteda")
 6 | 
 7 | set.seed(123)
 8 | 
 9 | # corpus, lexicon and aggregation control creation
10 | data("usnews")
11 | corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 600)
12 | 
13 | data("list_lexicons")
14 | lex <- sento_lexicons(list_lexicons[c("HENRY_en", "LM_en")])
15 | ctr <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("linear", "exponential"), by = "day",
16 |                lag = 60, alphasExp = c(0.1, 0.6))
17 | 
18 | sentMeas <- sento_measures(corpus, lex, ctr)
19 | 
20 | ### tests from here ###
21 | 
22 | # diff
23 | N <- nobs(sentMeas)
24 | M <- nmeasures(sentMeas)
25 | test_that("Differencing is properly done", {
26 |   expect_equal(nobs(diff(sentMeas, lag = 1)), N - 1)
27 |   expect_equal(nobs(diff(sentMeas, lag = 2, differences = 3)), N - 2 * 3)
28 | })
29 | 
30 | # scale
31 | s1 <- scale(sentMeas)
32 | s2 <- suppressWarnings(scale(sentMeas, center = -as.matrix(as.data.table(sentMeas)[, -1]), scale = FALSE))
33 | s3 <- scale(sentMeas, center = as.numeric(sentMeas$stats["mean", ]), scale = as.numeric(sentMeas$stats["sd", ]))
34 | s4 <- scale(sentMeas,
35 |             center = -matrix(as.numeric(sentMeas$stats["mean", ]), nrow = N, ncol = M, byrow = TRUE),
36 |             scale = matrix(as.numeric(sentMeas$stats["sd", ]), nrow = N, ncol = M, byrow = TRUE))
37 | test_that("Scaling is properly done", {
38 |   expect_equal(rowMeans(s1$stats["mean", ], na.rm = TRUE), c(mean = 0))
39 |   expect_equal(rowMeans(s1$stats["sd", ], na.rm = TRUE), c(sd = 1))
40 |   expect_equal(rowMeans(s2$stats["mean", ], na.rm = TRUE), c(mean = 0))
41 |   expect_equal(rowMeans(s2$stats["sd", ], na.rm = TRUE), c(sd = 0))
42 |   expect_equal(s1$stats["mean", ], s3$stats["mean", ])
43 |   expect_equal(s1$stats["sd", ], s3$stats["sd", ])
44 |   expect_equal(s1$stats["mean", ], s4$stats["mean", ])
45 |   expect_equal(s1$stats["sd", ], s4$stats["sd", ])
46 | })
47 | 
48 | # summary.sentomeasures, print.sentomeasures
49 | cat("\n")
50 | test_that("No output returned when object summarized or printed", {
51 |   expect_null(summary(sentMeas))
52 |   expect_null(print(sentMeas))
53 | })
54 | 
55 | # plot.sentomeasures
56 | p <- plot(sentMeas, group = sample(c("features", "lexicons", "time"), 1))
57 | test_that("Plot is a ggplot object", {
58 |   expect_true(inherits(p, "ggplot"))
59 | })
60 | 
61 | # as.data.table, measures_to_long
62 | measuresLong <- as.data.table(sentMeas, format = "long")
63 | test_that("Proper long formatting of sentiment measures", {
64 |   expect_true(nrow(measuresLong) == nobs(sentMeas) * nmeasures(sentMeas))
65 |   expect_true(all(sentMeas$lexicons %in% unique(measuresLong[["lexicons"]])))
66 |   expect_true(all(sentMeas$features %in% unique(measuresLong[["features"]])))
67 |   expect_true(all(sentMeas$time %in% unique(measuresLong[["time"]])))
68 |   expect_true(all(as.data.table(sentMeas)[["date"]] %in% unique(measuresLong[["date"]])))
69 | })
70 | 
71 | # as.data.frame
72 | test_that("Proper data.frame conversion", {
73 |   expect_true(class(as.data.frame(sentMeas)) == "data.frame")
74 | })
75 | 
76 | 


--------------------------------------------------------------------------------
/vignettes/contributions/gopress_figures/read_later.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/vignettes/contributions/gopress_figures/read_later.jpg


--------------------------------------------------------------------------------
/vignettes/contributions/gopress_figures/save_as.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/vignettes/contributions/gopress_figures/save_as.jpg


--------------------------------------------------------------------------------
/vignettes/development.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Future development"
 3 | output: rmarkdown::html_vignette
 4 | ---
 5 | 
 6 | Here is an overview of the most important anticipated developments, and known bugs or minor unfinished business. If you want to help out on some of these things, contact [Samuel Borms](mailto:borms_sam@hotmail.com), or simply open an issue and file a pull request on GitHub.
 7 | 
 8 | ### Extensions
 9 | 
10 | - Implement a `sento_train()` function to for instance generate a lexicon from a corpus.
11 | 
12 | - Add topic modeling functionality into the `add_features()` function (or as part of the `sento_train()` function).
13 | 
14 | - Expand the number of available models in the `sento_model()` function (e.g. constrained regression, and PCA).
15 | 
16 | - Implement an optimization approach into the `aggregate.sento_measures(..., do.global = TRUE)` function to extract optimized weights across dimensions (make it possibly available through the `sento_model()` function); this includes allowing weights to be set in the `aggregate.sento_measures()` function instead of averaging by default.
17 | 
18 | - Implement fast textual sentiment computation for lexicons with ngrams.
19 | 
20 | - Implement a `scale.sentiment()` function.
21 | 
22 | - Add a `head.sento_measures()` and a `tail.sento_measures()` function.
23 | 
24 | - Implement a structure to support high-frequency intraday aggregation.
25 | 
26 | - Make more lexicons available (e.g. in German and Spanish).
27 | 
28 | - Give more control to the user to play with **`glmnet`** parameters in the `sento_model()` function.
29 | 
30 | - Write a helper function to aggregate an `attributions` object into clusters.
31 | 
32 | - Resolve inconsistency with `data.frame` input columns (`"text(s)"` & `"(doc_)id"`) in the **`sentometrics`**, **`quanteda`** and **`tm`** corpus creators.
33 | 
34 | - Prepare functional CRAN version of **`sentometrics.app`** package.
35 | 
36 | - Find additional computational speed gains (especially after recent additions which introduced some overhead).
37 | 
38 | - Add a `"binary"` option to `get_hows()[["words"]]` that turns the sentiment computation into an indicator-like calculation (value of 1 if a text has at least one lexicon word).
39 | 
40 | ### Tweaks and bugs
41 | 
42 | - Optimize parallelization of iterative model runs (e.g. avoid unnecessary copying of objects across cores).
43 | 
44 | - Add a `delete_features()` function as an intuitive counterpart to `add_features()`.
45 | 
46 | - Solve issue that column names of sentiment measures output do not deal well with special characters but still get through.
47 | 
48 | - Handle `data.frame` and `matrix` input in `sento_model(..., y, ...)` function more consistently.
49 | 
50 | - Add references to external **`textdata`** package in examples (e.g. for extra lexicons).
51 | 
52 | - Be more flexible for the features in a `sento_corpus` object by also allowing values outside 0 and 1.
53 | 
54 | - Make sure subsetting does not maintain a `sentiment` object when it is not supposed to be.
55 | 
56 | - Remove all but one (not all) duplicate entries in the `sento_lexicons()` function.
57 | 
58 | - Make sure you can also add the `"language"` identifier to a corpus with `add_features()`.
59 | 
60 | 


--------------------------------------------------------------------------------
/vignettes/examples/corpus.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Corpus manipulation"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteEngine{knitr::rmarkdown}
 6 |   %\VignetteEncoding{UTF-8}
 7 | ---
 8 | 
 9 | ```{r, include=FALSE}
10 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width = 7, fig.height = 4, fig.align = "center")
11 | ```
12 | 
13 | This tutorial provides insights in how to create, enrich, transform, and analyze a `sento_corpus` object. A `sento_corpus` object is special because it always has a date column, and numeric metadata features.
14 | 
15 | **Preparation**
16 | &nbsp;
17 | 
18 | ```{r}
19 | library("sentometrics")
20 | library("quanteda")
21 | 
22 | data("usnews")
23 | data("list_lexicons")
24 | data("list_valence_shifters")
25 | ```
26 | 
27 | ### Summarize a corpus through some statistics and plots
28 | 
29 | The `corpus_summarize()` function allows quickly investigating how your corpus looks like in terms of number of documents, number of tokens, and its metadata features. It can be done at a daily, weekly, monthly, or yearly frequency, and for all the corpus features or only a selection of them.
30 | 
31 | ```{r}
32 | corpus <- sento_corpus(usnews)
33 | 
34 | summ <- corpus_summarize(corpus, by = "month", features = c("wsj", "wapo"))
35 | stats <- summ[["stats"]]
36 | plots <- summ[["plots"]]
37 | ```
38 | 
39 | The summary consists of a statistics component...
40 | 
41 | ```{r}
42 | stats
43 | ```
44 | 
45 | ... and a component with pregenerated graphs of the statistics.
46 | 
47 | ```{r}
48 | plots$doc_plot # monthly evolution of the number of documents
49 | plots$feature_plot # monthly evolution of the presence of the two journal features
50 | plots$token_plot # monthly evolution of the token statistics
51 | ```
52 | 
53 | ### Apply **`quanteda`** corpus functions on a `sento_corpus` object
54 | 
55 | It is also possible to apply the many corpus manipulation functions of the **`quanteda`** package on a `sento_corpus` object. In fact, the `sento_corpus` object is built on **`quanteda`**'s `corpus` object.
56 | 
57 | ```{r}
58 | corpus <- sento_corpus(usnews)
59 | 
60 | res <- corpus_reshape(corpus, to = "sentences")
61 | sam <- corpus_sample(corpus, 100)
62 | seg <- corpus_segment(corpus, pattern = "stock", use_docvars = TRUE)
63 | sub <- corpus_subset(corpus, wsj == 1)
64 | tri <- corpus_trim(corpus, "documents", min_ntoken = 300)
65 | trs <- corpus_trim(corpus, "sentences", min_ntoken = 40)
66 | ```
67 | 
68 | ### Enrich a `sento_corpus` object with features 
69 | 
70 | Using the `add_features()` function, additional features can be added to your corpus, or generated through keywords or regex pattern matching.
71 | 
72 | ```{r}
73 | corpus <- sento_corpus(usnews[, 1:3])
74 | 
75 | kw <- list(
76 |   E = c("economy", "economic"),
77 |   P = c("polic.|Polic.|politi.|Politi."), # a regex pattern
78 |   U = c("uncertainty", "uncertain")
79 | )
80 | 
81 | corpus <- add_features(corpus, keywords = kw, do.binary = TRUE, do.regex = c(FALSE, TRUE, FALSE))
82 | docvars(corpus, "dummyFeature") <- NULL
83 | 
84 | head(docvars(corpus), 20)
85 | ```
86 | 
87 | 


--------------------------------------------------------------------------------
/vignettes/sentometrics.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Getting started with sentometrics"
 3 | output: rmarkdown::html_vignette
 4 | ---
 5 | 
 6 | ```{r, include=FALSE}
 7 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width = 6, fig.height = 4, fig.align = "center")
 8 | ```
 9 | 
10 | You collected a large number of texts and think it is a good idea to summarize your corpus into several textual sentiment time series, which you ponder could help predicting some variable you are interested in. However, you do not really know how to proceed next... Fortunately, you come across the **`sentometrics`** package, which does exactly what you need! Great!
11 | 
12 | ## Installation
13 | 
14 | To install the package from CRAN, simply do:
15 | 
16 | ```{r, eval=FALSE}
17 | install.packages("sentometrics")
18 | ```
19 | 
20 | To install the latest development version of **`sentometrics`** (which may contain bugs!), execute:
21 | 
22 | ```{r, eval=FALSE}
23 | devtools::install_github("sborms/sentometrics")
24 | ```
25 | 
26 | ## Examples
27 | 
28 | Check out the **Examples** section. It includes tutorials with a bunch of examples, from simple to a little less simple, and some larger-scale applications. Sentiment computation, aggregation, diagnostic tools, visualization, regression -- it's all in there.
29 | 
30 | ## Readings
31 | 
32 | Check out the **Research** section, especially our [vignette](https://ssrn.com/abstract=3067734) which explains the ins and outs of the software package along with accompanying code examples. The complete documentation can be found on the [sentometrics CRAN](https://CRAN.R-project.org/package=sentometrics) page.
33 | 
34 | ## Shiny app
35 | 
36 | You might also want to have a look at the [**`sentometrics.app`**](https://github.com/sborms/sentometrics.app) package. Its `sentometrics.app::sento_app()` function embeds a Shiny application that displays many of **`sentometrics`**' functionalities. Enjoy!
37 | 
38 | ## Media
39 | 
40 | Earlier versions of the package were presented as a lightning talk at the eRum 2018 (Budapest) and useR! 2019 (Toulouse) conferences, and recorded!
41 | 
42 | <p float="left">
43 | <iframe width="410" height="270" src="https://www.youtube.com/embed/KC8LSBNvZrQ" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
44 | <iframe width="410" height="270" src="https://www.youtube.com/embed/nAlHzz4CP9E" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
45 | </p>
46 | 
47 | 


--------------------------------------------------------------------------------