├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── attribution.R ├── deprecated.R ├── sentiment_engines.R ├── sentocorpus.R ├── sentolexicons.R ├── sentomeasures_main.R ├── sentomeasures_measures_xyz.R ├── sentomeasures_methods.R ├── sentometrics.R ├── sentomodel.R ├── utils.R └── zzz.R ├── README.md ├── Sentometrics.Rproj ├── appendix ├── output_timings.txt ├── run_timings.R └── vignette_supplementary_appendix.pdf ├── cran-comments.md ├── data-raw ├── FEEL_eng_tr.rda ├── FEEL_fr.rda ├── FEEL_nl_tr.rda ├── GI_eng.rda ├── GI_fr_tr.rda ├── GI_nl_tr.rda ├── HENRY_eng.rda ├── HENRY_fr_tr.rda ├── HENRY_nl_tr.rda ├── LM_eng.rda ├── LM_fr_tr.rda ├── LM_nl_tr.rda ├── US_EPU_1985-2018.csv ├── US_economic_news_1951-2014.csv ├── _sources.txt ├── datasets.R ├── lexicons-raw │ ├── FEEL.csv │ ├── FEEL_eng.csv │ ├── FEEL_nl.csv │ ├── FEEL_raw.csv │ ├── GI.csv │ ├── GI_fr.csv │ ├── GI_nl.csv │ ├── GI_raw.csv │ ├── HENRY.csv │ ├── HENRY_fr.csv │ ├── HENRY_nl.csv │ ├── LM.csv │ ├── LM_fr.csv │ ├── LM_nl.csv │ └── LM_raw.csv ├── valence-raw │ └── valShifters.rda ├── valence_eng.rda ├── valence_fr.rda └── valence_nl.rda ├── data ├── epu.rda ├── list_lexicons.rda ├── list_valence_shifters.rda └── usnews.rda ├── docs ├── 404.html ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── articles │ ├── applications │ │ ├── epu.html │ │ ├── epu_files │ │ │ ├── figure-html │ │ │ │ ├── unnamed-chunk-10-1.png │ │ │ │ ├── unnamed-chunk-15-1.png │ │ │ │ └── unnamed-chunk-9-1.png │ │ │ ├── header-attrs-2.10 │ │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ │ └── header-attrs.js │ │ ├── vix.html │ │ └── vix_files │ │ │ ├── figure-html │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-16-2.png │ │ │ └── unnamed-chunk-9-1.png │ │ │ ├── header-attrs-2.10 │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── contributions │ │ ├── gopress.html │ │ ├── gopress_figures │ │ │ ├── read_later.jpg │ │ │ └── save_as.jpg │ │ ├── gopress_files │ │ │ ├── figure-html │ │ │ │ ├── sento 3-1.png │ │ │ │ ├── sento topic 3-1.png │ │ │ │ ├── unnamed-chunk-3-1.png │ │ │ │ └── unnamed-chunk-4-1.png │ │ │ ├── header-attrs-2.10 │ │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ │ └── header-attrs.js │ │ ├── isa.html │ │ └── isa_files │ │ │ ├── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ └── unnamed-chunk-27-1.png │ │ │ ├── header-attrs-2.10 │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── development.html │ ├── development_files │ │ ├── header-attrs-2.10 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── examples │ │ ├── corpus.html │ │ ├── corpus_files │ │ │ ├── figure-html │ │ │ │ ├── unnamed-chunk-5-1.png │ │ │ │ ├── unnamed-chunk-5-2.png │ │ │ │ └── unnamed-chunk-5-3.png │ │ │ ├── header-attrs-2.10 │ │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ │ └── header-attrs.js │ │ ├── indexation.html │ │ ├── indexation_files │ │ │ ├── figure-html │ │ │ │ ├── unnamed-chunk-11-1.png │ │ │ │ ├── unnamed-chunk-4-1.png │ │ │ │ ├── unnamed-chunk-4-2.png │ │ │ │ ├── unnamed-chunk-4-3.png │ │ │ │ └── unnamed-chunk-4-4.png │ │ │ ├── header-attrs-2.10 │ │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ │ └── header-attrs.js │ │ ├── modeling.html │ │ ├── modeling_files │ │ │ ├── figure-html │ │ │ │ ├── unnamed-chunk-11-1.png │ │ │ │ ├── unnamed-chunk-11-2.png │ │ │ │ ├── unnamed-chunk-11-3.png │ │ │ │ └── unnamed-chunk-9-1.png │ │ │ ├── header-attrs-2.10 │ │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ │ └── header-attrs.js │ │ ├── sentiment.html │ │ └── sentiment_files │ │ │ ├── figure-html │ │ │ └── unnamed-chunk-10-1.png │ │ │ ├── header-attrs-2.10 │ │ │ └── header-attrs.js │ │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── index.html │ ├── sentometrics.html │ └── sentometrics_files │ │ ├── header-attrs-2.10 │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ └── header-attrs.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── docsearch.json ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── index.html ├── link.svg ├── logo.png ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── add_features.html │ ├── aggregate.sentiment.html │ ├── aggregate.sento_measures.html │ ├── as.data.table.sento_measures.html │ ├── as.sentiment.html │ ├── as.sento_corpus.html │ ├── attributions.html │ ├── compute_sentiment.html │ ├── corpus_summarize.html │ ├── ctr_agg.html │ ├── ctr_model.html │ ├── data-defunct.html │ ├── diff.sento_measures.html │ ├── epu.html │ ├── figures │ │ ├── gsoc.png │ │ ├── innoviris.png │ │ ├── ivado.png │ │ ├── logo.png │ │ ├── snsf.png │ │ └── swissuniversities.png │ ├── get_dates.html │ ├── get_dimensions.html │ ├── get_hows.html │ ├── get_loss_data.html │ ├── index.html │ ├── list_lexicons.html │ ├── list_valence_shifters.html │ ├── measures_fill.html │ ├── measures_update.html │ ├── merge.sentiment.html │ ├── nmeasures.html │ ├── nobs.sento_measures.html │ ├── peakdates.html │ ├── peakdocs.html │ ├── plot.attributions.html │ ├── plot.sento_measures-1.png │ ├── plot.sento_measures.html │ ├── plot.sento_modelIter.html │ ├── predict.sento_model.html │ ├── scale.sento_measures.html │ ├── sento_corpus.html │ ├── sento_lexicons.html │ ├── sento_measures.html │ ├── sento_model.html │ ├── sentometrics-defunct.html │ ├── sentometrics-deprecated.html │ ├── sentometrics-package.html │ ├── subset.sento_measures.html │ ├── usnews.html │ ├── weights_almon.html │ ├── weights_beta.html │ └── weights_exponential.html └── sitemap.xml ├── examples ├── run_vignette.R └── vix.rda ├── index.md ├── inst ├── CITATION └── extdata │ └── test_data.rda ├── man ├── add_features.Rd ├── aggregate.sentiment.Rd ├── aggregate.sento_measures.Rd ├── as.data.table.sento_measures.Rd ├── as.sentiment.Rd ├── as.sento_corpus.Rd ├── attributions.Rd ├── compute_sentiment.Rd ├── corpus_summarize.Rd ├── ctr_agg.Rd ├── ctr_model.Rd ├── data-defunct.Rd ├── diff.sento_measures.Rd ├── epu.Rd ├── figures │ ├── gsoc.png │ ├── innoviris.png │ ├── ivado.png │ ├── logo.png │ ├── snsf.png │ └── swissuniversities.png ├── get_dates.Rd ├── get_dimensions.Rd ├── get_hows.Rd ├── get_loss_data.Rd ├── list_lexicons.Rd ├── list_valence_shifters.Rd ├── measures_fill.Rd ├── measures_update.Rd ├── merge.sentiment.Rd ├── nmeasures.Rd ├── nobs.sento_measures.Rd ├── peakdates.Rd ├── peakdocs.Rd ├── plot.attributions.Rd ├── plot.sento_measures.Rd ├── plot.sento_modelIter.Rd ├── predict.sento_model.Rd ├── scale.sento_measures.Rd ├── sento_corpus.Rd ├── sento_lexicons.Rd ├── sento_measures.Rd ├── sento_model.Rd ├── sentometrics-defunct.Rd ├── sentometrics-deprecated.Rd ├── sentometrics-package.Rd ├── subset.sento_measures.Rd ├── usnews.Rd ├── weights_almon.Rd ├── weights_beta.Rd └── weights_exponential.Rd ├── pkgdown ├── _pkgdown.yml └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── src ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── SentimentScorerBigrams.h ├── SentimentScorerClusters.h ├── SentimentScorerOnegrams.h ├── SentimentScorerSentences.h ├── compute_df.cpp ├── compute_sentiment_onegrams.cpp ├── compute_sentiment_sentences.cpp ├── compute_sentiment_valence.cpp ├── fill_NAs.cpp ├── get_dtf_vectors.cpp └── utils.h ├── tests ├── testthat.R └── testthat │ ├── test_aggregation.R │ ├── test_attribution.R │ ├── test_corpus_building.R │ ├── test_measures_manipulation.R │ ├── test_methods_sentomeasures.R │ ├── test_modeling.R │ └── test_sentiment_computation.R └── vignettes ├── applications ├── epu.Rmd └── vix.Rmd ├── contributions ├── gopress.Rmd ├── gopress_figures │ ├── read_later.jpg │ └── save_as.jpg └── isa.Rmd ├── development.Rmd ├── examples ├── corpus.Rmd ├── indexation.Rmd ├── modeling.Rmd └── sentiment.Rmd └── sentometrics.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^data-raw$ 4 | ^docs$ 5 | ^examples$ 6 | ^THANKS$ 7 | ^sentometrics-manual-.*\.pdf$ 8 | ^cran-comments\.md$ 9 | ^CRAN-RELEASE$ 10 | ^appendix$ 11 | ^_pkgdown\.yml$ 12 | ^pkgdown$ 13 | ^vignettes$ 14 | ^index\.md$ 15 | ^_TODO$ 16 | ^\.github$ 17 | ^CRAN-SUBMISSION$ 18 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | # - {os: windows-latest, r: 'release'} # parallelization keeps complaining that nmeasures() is not recognized 26 | - {os: macOS-latest, r: 'release'} 27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 28 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | RSPM: ${{ matrix.config.rspm }} 33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | steps: 36 | - uses: actions/checkout@v2 37 | 38 | - uses: r-lib/actions/setup-r@v1 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | 42 | - uses: r-lib/actions/setup-pandoc@v1 43 | 44 | - name: Query dependencies 45 | run: | 46 | install.packages('remotes') 47 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 48 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 49 | shell: Rscript {0} 50 | 51 | - name: Restore R package cache 52 | uses: actions/cache@v2 53 | with: 54 | path: ${{ env.R_LIBS_USER }} 55 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 56 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 57 | 58 | - name: Install system dependencies 59 | if: runner.os == 'Linux' 60 | run: | 61 | while read -r cmd 62 | do 63 | eval sudo $cmd 64 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 65 | 66 | - name: Install dependencies 67 | run: | 68 | remotes::install_deps(dependencies = TRUE) 69 | remotes::install_cran("rcmdcheck") 70 | shell: Rscript {0} 71 | 72 | - name: Check 73 | env: 74 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 75 | run: | 76 | options(crayon.enabled = TRUE) 77 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 78 | shell: Rscript {0} 79 | 80 | - name: Upload check results 81 | if: failure() 82 | uses: actions/upload-artifact@main 83 | with: 84 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 85 | path: check 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | src-i386/ 9 | src-x64/ 10 | gopress_downloads/ 11 | _TODO 12 | CRAN-SUBMISSION 13 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sentometrics 2 | Type: Package 3 | Title: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction 4 | Version: 1.0.1 5 | Authors@R: c(person("Samuel", "Borms", email = "borms_sam@hotmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9533-1870")), 6 | person("David", "Ardia", email = "david.ardia@hec.ca", role = c("aut"), comment = c(ORCID = "0000-0003-2823-782X")), 7 | person("Keven", "Bluteau", email = "keven.bluteau@usherbrooke.ca", role = c("aut"), comment = c(ORCID = "0000-0003-2990-4807")), 8 | person("Kris", "Boudt", email = "kris.boudt@vub.be", role = c("aut"), comment = c(ORCID = "0000-0002-1000-5142")), 9 | person("Jeroen", "Van Pelt", email = "jeroenvanpelt@hotmail.com", role = c("ctb")), 10 | person("Andres", "Algaba", email = "andres.algaba@vub.be", role = c("ctb"))) 11 | Maintainer: Samuel Borms 12 | Description: Optimized prediction based on textual sentiment, accounting for the intrinsic challenge that sentiment can be computed and pooled across texts and time in various ways. See Ardia et al. (2021) . 13 | Depends: R (>= 3.3.0) 14 | License: GPL (>= 2) 15 | BugReports: https://github.com/SentometricsResearch/sentometrics/issues 16 | URL: https://sentometrics-research.com/sentometrics/ 17 | Encoding: UTF-8 18 | LazyData: true 19 | Suggests: covr, 20 | doParallel, 21 | e1071, 22 | lexicon, 23 | MCS, 24 | NLP, 25 | parallel, 26 | randomForest, 27 | stopwords, 28 | testthat, 29 | tm 30 | Imports: caret, 31 | compiler, 32 | data.table, 33 | foreach, 34 | ggplot2, 35 | glmnet, 36 | ISOweek, 37 | quanteda, 38 | Rcpp (>= 0.12.13), 39 | RcppRoll, 40 | RcppParallel, 41 | stats, 42 | stringi, 43 | utils 44 | LinkingTo: Rcpp, RcppArmadillo, RcppParallel 45 | RoxygenNote: 7.3.2 46 | SystemRequirements: GNU make 47 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("$<-",sento_lexicons) 4 | S3method("[",sento_lexicons) 5 | S3method("[<-",sento_lexicons) 6 | S3method("[[<-",sento_lexicons) 7 | S3method("docvars<-",sento_corpus) 8 | S3method("names<-",sento_lexicons) 9 | S3method(aggregate,sentiment) 10 | S3method(aggregate,sento_measures) 11 | S3method(as.data.frame,sento_corpus) 12 | S3method(as.data.frame,sento_measures) 13 | S3method(as.data.table,sento_corpus) 14 | S3method(as.data.table,sento_measures) 15 | S3method(as.sentiment,data.frame) 16 | S3method(as.sentiment,data.table) 17 | S3method(as.sento_corpus,SimpleCorpus) 18 | S3method(as.sento_corpus,VCorpus) 19 | S3method(as.sento_corpus,corpus) 20 | S3method(attributions,sento_model) 21 | S3method(attributions,sento_modelIter) 22 | S3method(compute_sentiment,SimpleCorpus) 23 | S3method(compute_sentiment,VCorpus) 24 | S3method(compute_sentiment,character) 25 | S3method(compute_sentiment,corpus) 26 | S3method(compute_sentiment,sento_corpus) 27 | S3method(diff,sento_measures) 28 | S3method(merge,sentiment) 29 | S3method(nmeasures,sento_measures) 30 | S3method(nobs,sento_measures) 31 | S3method(plot,attributions) 32 | S3method(plot,sento_measures) 33 | S3method(plot,sento_modelIter) 34 | S3method(predict,sento_model) 35 | S3method(print,sento_corpus) 36 | S3method(print,sento_measures) 37 | S3method(print,sento_model) 38 | S3method(print,sento_modelIter) 39 | S3method(scale,sento_measures) 40 | S3method(subset,sento_measures) 41 | S3method(summary,sento_measures) 42 | S3method(summary,sento_model) 43 | S3method(summary,sento_modelIter) 44 | export(add_features) 45 | export(as.sentiment) 46 | export(as.sento_corpus) 47 | export(attributions) 48 | export(compute_sentiment) 49 | export(corpus_summarize) 50 | export(ctr_agg) 51 | export(ctr_model) 52 | export(get_dates) 53 | export(get_dimensions) 54 | export(get_hows) 55 | export(get_loss_data) 56 | export(measures_fill) 57 | export(measures_update) 58 | export(nmeasures) 59 | export(peakdates) 60 | export(peakdocs) 61 | export(sento_corpus) 62 | export(sento_lexicons) 63 | export(sento_measures) 64 | export(sento_model) 65 | export(weights_almon) 66 | export(weights_beta) 67 | export(weights_exponential) 68 | import(data.table) 69 | import(ggplot2) 70 | importFrom(Rcpp,evalCpp) 71 | importFrom(RcppParallel,RcppParallelLibs) 72 | importFrom(compiler,cmpfun) 73 | importFrom(foreach,"%dopar%") 74 | importFrom(quanteda,"docvars<-") 75 | importFrom(stats,aggregate) 76 | importFrom(stats,nobs) 77 | useDynLib(sentometrics,.registration = TRUE) 78 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | compute_df <- function(alpha, lambda, xA) { 5 | .Call(`_sentometrics_compute_df`, alpha, lambda, xA) 6 | } 7 | 8 | compute_sentiment_onegrams <- function(texts, lexicons, how) { 9 | .Call(`_sentometrics_compute_sentiment_onegrams`, texts, lexicons, how) 10 | } 11 | 12 | compute_sentiment_sentences <- function(texts, lexicons, how, valenceType) { 13 | .Call(`_sentometrics_compute_sentiment_sentences`, texts, lexicons, how, valenceType) 14 | } 15 | 16 | compute_sentiment_valence <- function(texts, lexicons, how) { 17 | .Call(`_sentometrics_compute_sentiment_valence`, texts, lexicons, how) 18 | } 19 | 20 | fill_NAs <- function(x) { 21 | .Call(`_sentometrics_fill_NAs`, x) 22 | } 23 | 24 | get_dtf_vectors <- function(texts) { 25 | .Call(`_sentometrics_get_dtf_vectors`, texts) 26 | } 27 | 28 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | 2 | .onLoad <- function(libname = find.package("sentometrics"), pkgname = "sentometrics") { 3 | # CRAN note avoidance 4 | if (getRversion() >= "2.15.1") 5 | utils::globalVariables( 6 | c("value", "variable", "word_count", "w", 7 | "attrib", "feature", "id", "i", "wLex", 8 | "wFeat", "wTime", "x", "identifier", 9 | ".", "documents", "language", "nTokens", 10 | "lag", "sentence_id", "n", "pos", "neg") 11 | ) 12 | invisible() 13 | } 14 | 15 | .onUnload <- function (libpath) { 16 | library.dynam.unload("sentometrics", libpath) 17 | } 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## _sentometrics_: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction 5 | 6 | 7 | [![CRAN](https://www.r-pkg.org/badges/version/sentometrics)](https://cran.r-project.org/package=sentometrics) 8 | [![Downloads](https://cranlogs.r-pkg.org/badges/last-day/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 9 | [![Downloads](https://cranlogs.r-pkg.org/badges/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 10 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 11 | 12 | 13 | 14 | 15 | 16 | 17 | ### Introduction 18 | 19 | The **`sentometrics`** package is an **integrated framework for textual sentiment time series aggregation and prediction**. It accounts for the intrinsic challenge that textual sentiment can be computed in many different ways, as well as the large number of possibilities to pool sentiment into a time series index. The package integrates the fast _quantification_ of sentiment from texts, the _aggregation_ into different sentiment time series, and the _prediction_ based on these measures. All in one coherent workflow! 20 | 21 | See the [package website](https://sentometrics-research.com/sentometrics/) and the [vignette](https://doi.org/10.18637/jss.v099.i02) published in the Journal of Statistical Software for plenty of examples and details. We also refer to our [survey](https://doi.org/10.1111/joes.12370) organized as an overview of the required steps in a typical econometric analysis of sentiment from alternative (such as textual) data, and following companion [web page](https://sborms.github.io/econometrics-meets-sentiment/). 22 | 23 | ### Installation 24 | 25 | To install the package from CRAN, simply do: 26 | 27 | ```R 28 | install.packages("sentometrics") 29 | ``` 30 | 31 | To install the latest development version of **`sentometrics`** (which may contain bugs!), execute: 32 | 33 | ```R 34 | devtools::install_github("SentometricsResearch/sentometrics") 35 | ``` 36 | 37 | ### Shiny application 38 | 39 | For a visual interface as a Shiny application of the package's core functionalities, install the [**`sentometrics.app`**](https://github.com/DataWanderers/sentometrics.app) package, and run the `sento_app()` function. 40 | 41 | ### Reference 42 | 43 | Please cite **`sentometrics`** in publications. Use `citation("sentometrics")`. 44 | 45 | ### Acknowledgements 46 | 47 | This software package originates from a 48 | [Google Summer of Code 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Sentometrics:-An-integrated-framework-for-text-based-multivariate-time-series-modeling-and-forecasting) project, was further developed 49 | during a follow-up [Google Summer of Code 2019](https://github.com/rstats-gsoc/gsoc2019/wiki/sentometrics) project, and benefited generally from financial support by [Innoviris](https://www.innoviris.brussels/), [IVADO](https://ivado.ca/), [swissuniversities](https://www.swissuniversities.ch), and the [Swiss National Science Foundation](http://www.snf.ch) (grants #179281 and #191730). 50 | 51 | -------------------------------------------------------------------------------- /Sentometrics.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: aad29760-aff4-4264-a2a0-cfc47f37bf4d 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: No 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /appendix/output_timings.txt: -------------------------------------------------------------------------------- 1 | Run timings for texts size of 1000 2 | Run timings for texts size of 5000 3 | Run timings for texts size of 10000 4 | Run timings for texts size of 25000 5 | Run timings for texts size of 50000 6 | Run timings for texts size of 75000 7 | Run timings for texts size of 1e+05 8 | 9 | Run timings for texts size of 1000 10 | Run timings for texts size of 5000 11 | Run timings for texts size of 10000 12 | Run timings for texts size of 25000 13 | Run timings for texts size of 50000 14 | Run timings for texts size of 75000 15 | Run timings for texts size of 1e+05 16 | 17 | Run timings for texts size of 1000 18 | Run timings for texts size of 5000 19 | Run timings for texts size of 10000 20 | Run timings for texts size of 25000 21 | Run timings for texts size of 50000 22 | Run timings for texts size of 75000 23 | Run timings for texts size of 1e+05 24 | 25 | PANEL A 26 | texts sento_unigrams sento_bigrams sento_clusters meanr SentimentAnalysis syuzhet quanteda tidytext 27 | 1: 1000 0.2447 0.1976 0.2237 0.0777 1.180 0.5468 0.5985 0.1605 28 | 2: 5000 0.8670 0.8678 0.9144 0.3420 5.257 1.9872 1.7366 0.5995 29 | 3: 10000 1.7251 1.6773 1.7209 0.6688 11.225 3.8307 3.0742 1.1110 30 | 4: 25000 4.4119 4.2144 4.4019 1.7121 26.875 9.0715 7.1894 2.8258 31 | 5: 50000 9.1801 8.5456 9.4217 3.7477 53.084 18.3654 14.1207 5.8835 32 | 6: 75000 13.6154 13.4873 13.4365 5.0550 78.437 27.1280 20.3666 8.4837 33 | 7: 100000 18.6920 18.2231 18.6149 6.5685 109.576 35.2517 26.9816 11.0646 34 | 35 | PANEL B 36 | texts sento_unigrams_many sento_unigrams_many_features sento_bigrams_many sento_clusters_many 37 | 1: 1000 0.2608 0.2394 0.2661 0.2649 38 | 2: 5000 1.0047 0.8692 1.0122 1.0133 39 | 3: 10000 1.9642 1.6790 1.9799 1.9657 40 | 4: 25000 4.8174 4.2360 4.9001 4.9665 41 | 5: 50000 9.9563 8.7101 10.1299 10.0230 42 | 6: 75000 16.6963 19.1378 16.6728 23.0397 43 | 7: 100000 32.3982 23.6591 23.8046 36.4094 44 | sento_clusters_many_parallel tidytext_unigrams_many tidytext_bigrams_many 45 | 1: 0.2183 0.2088 0.6596 46 | 2: 0.7902 0.6694 2.7952 47 | 3: 1.5441 1.2747 5.6750 48 | 4: 3.8085 3.0665 13.9461 49 | 5: 7.8541 6.0343 27.9955 50 | 6: 15.4257 13.9951 58.0154 51 | 7: 30.8611 14.0195 64.7318 52 | 53 | ############################## 54 | ###### SESSION INFO 55 | 56 | R version 3.6.2 (2019-12-12) 57 | Platform: x86_64-w64-mingw32/x64 (64-bit) 58 | Running under: Windows 10 x64 (build 18362) 59 | 60 | Matrix products: default 61 | 62 | locale: 63 | [1] LC_COLLATE=English_Belgium.1252 LC_CTYPE=English_Belgium.1252 LC_MONETARY=English_Belgium.1252 64 | [4] LC_NUMERIC=C LC_TIME=English_Belgium.1252 65 | 66 | attached base packages: 67 | [1] stats graphics grDevices utils datasets methods base 68 | 69 | other attached packages: 70 | [1] microbenchmark_1.4-7 tidyr_1.0.0 dplyr_0.8.3 lexicon_1.2.1 71 | [5] data.table_1.12.8 SentimentAnalysis_1.3-3 syuzhet_1.0.4 meanr_0.1-2 72 | [9] tidytext_0.2.2 quanteda_1.5.2 sentometrics_0.8.0 73 | 74 | loaded via a namespace (and not attached): 75 | [1] NLP_0.2-0 Rcpp_1.0.3 pillar_1.4.3 compiler_3.6.2 tokenizers_0.2.1 iterators_1.0.12 76 | [7] tools_3.6.2 stopwords_1.0 zeallot_0.1.0 packrat_0.5.0 lubridate_1.7.4 lifecycle_0.1.0 77 | [13] tibble_2.1.3 gtable_0.3.0 lattice_0.20-38 pkgconfig_2.0.3 rlang_0.4.2 Matrix_1.2-18 78 | [19] foreach_1.4.7 fastmatch_1.1-0 rstudioapi_0.10 parallel_3.6.2 xml2_1.2.2 janeaustenr_0.1.5 79 | [25] stringr_1.4.0 vctrs_0.2.1 generics_0.0.2 grid_3.6.2 tidyselect_0.2.5 glue_1.3.1 80 | [31] R6_2.4.1 ggplot2_3.2.1 purrr_0.3.3 spacyr_1.2 magrittr_1.5 ellipsis_0.3.0 81 | [37] backports_1.1.5 SnowballC_0.6.0 scales_1.1.0 codetools_0.2-16 assertthat_0.2.1 colorspace_1.4-1 82 | [43] stringi_1.4.5 RcppParallel_4.4.4 lazyeval_0.2.2 munsell_0.5.0 slam_0.1-47 tm_0.7-7 83 | [49] crayon_1.3.4 84 | 85 | -------------------------------------------------------------------------------- /appendix/vignette_supplementary_appendix.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/appendix/vignette_supplementary_appendix.pdf -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | 2 | ## resubmission (version 1.0.1) [03/04/2025] 3 | 4 | - removed one faulty URL ---> 5 | 6 | 9 | 10 | 13 | 14 | 17 | 18 | 21 | 22 | 25 | 26 | 29 | 30 | 35 | 36 | 39 | 40 | 45 | 46 | 49 | 50 | 53 | 54 | 57 | 58 | 61 | 62 | 65 | 66 | 69 | 70 | 76 | 77 | 83 | 84 | 90 | 91 | 94 | 95 | 98 | 99 | 102 | 103 | 107 | 108 | 113 | 114 | 119 | 120 | -------------------------------------------------------------------------------- /data-raw/FEEL_eng_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_eng_tr.rda -------------------------------------------------------------------------------- /data-raw/FEEL_fr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_fr.rda -------------------------------------------------------------------------------- /data-raw/FEEL_nl_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/FEEL_nl_tr.rda -------------------------------------------------------------------------------- /data-raw/GI_eng.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_eng.rda -------------------------------------------------------------------------------- /data-raw/GI_fr_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_fr_tr.rda -------------------------------------------------------------------------------- /data-raw/GI_nl_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/GI_nl_tr.rda -------------------------------------------------------------------------------- /data-raw/HENRY_eng.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_eng.rda -------------------------------------------------------------------------------- /data-raw/HENRY_fr_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_fr_tr.rda -------------------------------------------------------------------------------- /data-raw/HENRY_nl_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/HENRY_nl_tr.rda -------------------------------------------------------------------------------- /data-raw/LM_eng.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_eng.rda -------------------------------------------------------------------------------- /data-raw/LM_fr_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_fr_tr.rda -------------------------------------------------------------------------------- /data-raw/LM_nl_tr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/LM_nl_tr.rda -------------------------------------------------------------------------------- /data-raw/US_economic_news_1951-2014.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/US_economic_news_1951-2014.csv -------------------------------------------------------------------------------- /data-raw/_sources.txt: -------------------------------------------------------------------------------- 1 | 2 | LIST_LEXICONS: 3 | LM: https://www3.nd.edu/~mcdonald/Word_Lists.html 4 | > Tim Loughran and Bill McDonald, 2011, "When is a Liability not a Liability? Textual Analysis, Dictionaries, and 10-Ks", Journal of Finance, 66:1, 35-65 5 | 6 | HENRY: paper pp. 387 7 | > Elaine Henry. (2008). "Are Investors Influenced by the Way Earnings Press Releases are Written?", The Journal of Business Communication, 4 (45), 363-407 8 | 9 | GI: http://www.wjh.harvard.edu/~inquirer/spreadsheet_guide.htm 10 | > Harvard IV-4 dictionary + Lasswell value dictionary 11 | 12 | FEEL: http://www.lirmm.fr/~abdaoui/FEEL 13 | > Amine Abdaoui, Jérôme Azé, Sandra Bringay et Pascal Poncelet. "FEEL: French Expanded Emotion Lexicon". Language Resources and Evaluation, LRE 2016, pp. 1-23 14 | 15 | LIST_VALENCE_SHIFTERS: R package lexicon 16 | 17 | USNEWS: https://www.crowdflower.com/data-for-everyone ("Economic News Article Tone and Relevance") 18 | 19 | EPU: http://www.policyuncertainty.com/us_monthly.html 20 | 21 | -------------------------------------------------------------------------------- /data-raw/lexicons-raw/FEEL.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/FEEL_eng.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL_eng.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/FEEL_nl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/FEEL_nl.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/GI_fr.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/GI_fr.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/GI_nl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/GI_nl.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/HENRY.csv: -------------------------------------------------------------------------------- 1 | Word;Polarity 2 | negative;-1 3 | negatives;-1 4 | fail;-1 5 | fails;-1 6 | failing;-1 7 | failure;-1 8 | weak;-1 9 | weakness;-1 10 | weaknesses;-1 11 | difficult;-1 12 | difficulty;-1 13 | hurdle;-1 14 | hurdles;-1 15 | obstacle;-1 16 | obstacles;-1 17 | slump;-1 18 | slumps;-1 19 | slumping;-1 20 | slumped;-1 21 | uncertain;-1 22 | uncertainty;-1 23 | unsettled;-1 24 | unfavorable;-1 25 | downturn;-1 26 | depressed;-1 27 | disappoint;-1 28 | disappoints;-1 29 | disappointing;-1 30 | disappointed;-1 31 | disappointment;-1 32 | risk;-1 33 | risks;-1 34 | risky;-1 35 | threat;-1 36 | threats;-1 37 | penalty;-1 38 | penalties;-1 39 | down;-1 40 | decrease;-1 41 | decreases;-1 42 | decreasing;-1 43 | decreased;-1 44 | decline;-1 45 | declines;-1 46 | declining;-1 47 | declined;-1 48 | fall;-1 49 | falls;-1 50 | falling;-1 51 | fell;-1 52 | fallen;-1 53 | drop;-1 54 | drops;-1 55 | dropping;-1 56 | dropped;-1 57 | deteriorate;-1 58 | deteriorates;-1 59 | deteriorating;-1 60 | deteriorated;-1 61 | worsen;-1 62 | worsens;-1 63 | worsening;-1 64 | weaken;-1 65 | weakens;-1 66 | weakening;-1 67 | weakened;-1 68 | worse;-1 69 | worst;-1 70 | low;-1 71 | lower;-1 72 | lowest;-1 73 | less;-1 74 | least;-1 75 | smaller;-1 76 | smallest;-1 77 | shrink;-1 78 | shrinks;-1 79 | shrinking;-1 80 | shrunk;-1 81 | below;-1 82 | under;-1 83 | challenge;-1 84 | challenges;-1 85 | challenging;-1 86 | challenged;-1 87 | positive;1 88 | positives;1 89 | success;1 90 | successes;1 91 | successful;1 92 | succeed;1 93 | succeeds;1 94 | succeeding;1 95 | succeeded;1 96 | accomplish;1 97 | accomplishes;1 98 | accomplishing;1 99 | accomplished;1 100 | accomplishment;1 101 | accomplishments;1 102 | strong;1 103 | strength;1 104 | strengths;1 105 | certain;1 106 | certainty;1 107 | definite;1 108 | solid;1 109 | excellent;1 110 | good;1 111 | leading;1 112 | achieve;1 113 | achieves;1 114 | achieved;1 115 | achieving;1 116 | achievement;1 117 | achievements;1 118 | progress;1 119 | progressing;1 120 | deliver;1 121 | delivers;1 122 | delivered;1 123 | delivering;1 124 | leader;1 125 | leading;1 126 | pleased;1 127 | reward;1 128 | rewards;1 129 | rewarding;1 130 | rewarded;1 131 | opportunity;1 132 | opportunities;1 133 | enjoy;1 134 | enjoys;1 135 | enjoying;1 136 | enjoyed;1 137 | encouraged;1 138 | encouraging;1 139 | up;1 140 | increase;1 141 | increases;1 142 | increasing;1 143 | increased;1 144 | rise;1 145 | rises;1 146 | rising;1 147 | rose;1 148 | risen;1 149 | improve;1 150 | improves;1 151 | improving;1 152 | improved;1 153 | improvement;1 154 | improvements;1 155 | strengthen;1 156 | strengthens;1 157 | strengthening;1 158 | strengthened;1 159 | stronger;1 160 | strongest;1 161 | better;1 162 | best;1 163 | more;1 164 | most;1 165 | above;1 166 | record;1 167 | high;1 168 | higher;1 169 | highest;1 170 | greater;1 171 | greatest;1 172 | larger;1 173 | largest;1 174 | grow;1 175 | grows;1 176 | growing;1 177 | grew;1 178 | grown;1 179 | growth;1 180 | expand;1 181 | expands;1 182 | expanding;1 183 | expanded;1 184 | expansion;1 185 | exceed;1 186 | exceeds;1 187 | exceeded;1 188 | exceeding;1 189 | beat;1 190 | beats;1 191 | beating;1 192 | -------------------------------------------------------------------------------- /data-raw/lexicons-raw/HENRY_fr.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/HENRY_fr.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/HENRY_nl.csv: -------------------------------------------------------------------------------- 1 | Word;Polarity;Retranslation 2 | negatieve;-1;negative 3 | negatieven;-1;negatives 4 | Fail;-1;Fail 5 | mislukt;-1;failed 6 | falende;-1;failing 7 | storing;-1;failure 8 | zwak;-1;weak 9 | zwakte;-1;weakness 10 | zwakke punten;-1;weak points 11 | moeilijk;-1;difficult 12 | moeilijkheidsgraad;-1;level of difficulty 13 | hindernis;-1;obstacle 14 | horden;-1;hurdles 15 | obstakel;-1;obstacle 16 | obstakels;-1;obstacles 17 | malaise;-1;malaise 18 | laagconjunctuur;-1;slumps 19 | slumping;-1;slumping 20 | zakte;-1;slumped 21 | onzeker;-1;uncertain 22 | onzekerheid;-1;uncertainty 23 | onrustig;-1;restless 24 | ongunstige;-1;unfavorable 25 | neergang;-1;downturn 26 | depressief;-1;depressed 27 | teleurstellen;-1;disappoint 28 | stelt teleur;-1;disappoints 29 | teleurstellend;-1;disappointing 30 | teleurgesteld;-1;disappointed 31 | teleurstelling;-1;disappointment 32 | risico;-1;risk 33 | risico 's;-1;risks 34 | riskant;-1;risky 35 | bedreiging;-1;threat 36 | bedreigingen;-1;threats 37 | straf;-1;Criminal 38 | sancties;-1;sanctions 39 | naar beneden;-1;down 40 | daling;-1;fall 41 | vermindert;-1;reduces 42 | minderen;-1;Juniors 43 | daalde;-1;sank 44 | daling;-1;fall 45 | dalingen;-1;falls 46 | dalende;-1;falling 47 | daalde;-1;sank 48 | Val;-1;Val 49 | Falls;-1;Falls 50 | die vallen;-1;that fall 51 | viel;-1;fell 52 | gedaald;-1;dropped 53 | drop;-1;drop 54 | DROPS;-1;DROPS 55 | dropping;-1;dropping 56 | gedaald;-1;dropped 57 | verslechteren;-1;deteriorate 58 | verslechtert;-1;deteriorates 59 | verslechterende;-1;deteriorating 60 | verslechterd;-1;deteriorated 61 | verergeren;-1;worsen 62 | verergert;-1;worsens 63 | verslechtering;-1;deterioration 64 | verzwakken;-1;weaken 65 | verzwakt;-1;weakened 66 | verzwakking;-1;weakening 67 | verzwakt;-1;weakened 68 | erger;-1;worse 69 | slechtste;-1;worst 70 | lage;-1;low 71 | lagere;-1;lower 72 | laagste;-1;lowest 73 | minder;-1;less 74 | minste;-1;least 75 | kleinere;-1;smaller 76 | kleinste;-1;smallest 77 | krimpen;-1;shrink 78 | krimpt;-1;shrinks 79 | krimpen;-1;shrink 80 | gekrompen;-1;shrunk 81 | Hieronder;-1;Below 82 | onder;-1;under 83 | uitdaging;-1;challenge 84 | uitdagingen;-1;challenges 85 | uitdagende;-1;challenging 86 | uitgedaagd;-1;challenged 87 | positieve;1;positive 88 | positieven;1;positives 89 | succes;1;Good luck 90 | successen;1;successes 91 | succesvolle;1;successful 92 | slagen;1;succeed 93 | slaagt;1;succeeds 94 | slagen;1;succeed 95 | geslaagd;1;managed 96 | bereiken;1;reach 97 | volbrengt;1;accomplishes 98 | volbrengen;1;accomplish 99 | bereikt;1;achieved 100 | prestatie;1;performance 101 | prestaties;1;performance 102 | sterke;1;strong 103 | sterkte;1;strength 104 | sterke punten;1;strong points 105 | bepaalde;1;certain 106 | zekerheid;1;Security 107 | duidelijke;1;clear 108 | solide;1;solid 109 | Uitstekend;1;Excellent 110 | goede;1;good 111 | leiden;1;lead 112 | bereiken;1;reach 113 | bereikt;1;achieved 114 | bereikt;1;achieved 115 | verwezenlijking van;1;creation of 116 | prestatie;1;performance 117 | resultaten;1;results 118 | vooruitgang;1;progress 119 | vordert;1;progresses 120 | leveren;1;deliver 121 | levert;1;delivers 122 | afgeleverd;1;delivered 123 | leveren van;1;delivering 124 | leider;1;leader 125 | leiden;1;lead 126 | blij;1;happy 127 | beloning;1;reward 128 | beloningen;1;Rewards 129 | belonen;1;reward 130 | beloond;1;rewarded 131 | kans;1;chance 132 | kansen;1;opportunities 133 | Geniet van;1;Enjoy 134 | Geniet van;1;Enjoy 135 | genieten van;1;enjoy 136 | genoten;1;enjoyed 137 | aangemoedigd;1;encouraged 138 | stimuleren;1;boost 139 | omhoog;1;up 140 | verhoging van de;1;increase of the 141 | verhoogt;1;increases 142 | verhogen;1;increase 143 | verhoogd;1;increased 144 | opkomst;1;attendance 145 | stijgt;1;increases 146 | stijgen;1;ascent 147 | steeg;1;rose 148 | gestegen;1;increased 149 | verbeteren;1;improve 150 | verbetert;1;improves 151 | verbetering van de;1;improvement of the 152 | verbeterd;1;improved 153 | verbetering;1;improvement 154 | verbeteringen;1;improvements 155 | versterken;1;strengthening 156 | versterkt;1;strengthened 157 | versterking van de;1;strengthening of the 158 | versterkt;1;strengthened 159 | sterker;1;stronger 160 | sterkste;1;strongest 161 | beter;1;better 162 | beste;1;best 163 | meer;1;more 164 | de meeste;1;most 165 | boven;1;above 166 | record;1;record 167 | hoge;1;high 168 | hogere;1;higher 169 | hoogste;1;highest 170 | meer;1;more 171 | grootste;1;largest 172 | grotere;1;larger 173 | grootste;1;largest 174 | groeien;1;grow 175 | groeit;1;grows 176 | groeiende;1;growing 177 | groeide;1;grew 178 | gegroeid;1;grown 179 | groei;1;growth 180 | Vouw;1;Fold 181 | breidt uit;1;expands 182 | uit te breiden;1;expand 183 | uitgebreid;1;extended 184 | uitbreiding;1;extension 185 | hoger zijn dan;1;higher than 186 | overschrijdt;1;exceeds 187 | overschreden;1;exceeded 188 | meer dan;1;more than 189 | Beat;1;Beat 190 | beats;1;beats 191 | pak slaag;1;spanking 192 | -------------------------------------------------------------------------------- /data-raw/lexicons-raw/LM_fr.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/LM_fr.csv -------------------------------------------------------------------------------- /data-raw/lexicons-raw/LM_nl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/lexicons-raw/LM_nl.csv -------------------------------------------------------------------------------- /data-raw/valence-raw/valShifters.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence-raw/valShifters.rda -------------------------------------------------------------------------------- /data-raw/valence_eng.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_eng.rda -------------------------------------------------------------------------------- /data-raw/valence_fr.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_fr.rda -------------------------------------------------------------------------------- /data-raw/valence_nl.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data-raw/valence_nl.rda -------------------------------------------------------------------------------- /data/epu.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/epu.rda -------------------------------------------------------------------------------- /data/list_lexicons.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/list_lexicons.rda -------------------------------------------------------------------------------- /data/list_valence_shifters.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/list_valence_shifters.rda -------------------------------------------------------------------------------- /data/usnews.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/data/usnews.rda -------------------------------------------------------------------------------- /docs/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/articles/applications/epu_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/applications/epu_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/articles/applications/epu_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/epu_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/applications/epu_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/applications/epu_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-16-2.png -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/applications/vix_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/applications/vix_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_figures/read_later.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_figures/read_later.jpg -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_figures/save_as.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_figures/save_as.jpg -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/figure-html/sento 3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/sento 3-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/figure-html/sento topic 3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/sento topic 3-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/gopress_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/contributions/gopress_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/contributions/isa_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/isa_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/isa_files/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/contributions/isa_files/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /docs/articles/contributions/isa_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/contributions/isa_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/development_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/development_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-2.png -------------------------------------------------------------------------------- /docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/corpus_files/figure-html/unnamed-chunk-5-3.png -------------------------------------------------------------------------------- /docs/articles/examples/corpus_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/corpus_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-2.png -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-3.png -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/indexation_files/figure-html/unnamed-chunk-4-4.png -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/indexation_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-2.png -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-11-3.png -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/modeling_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/modeling_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/sentiment_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/articles/examples/sentiment_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/examples/sentiment_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/examples/sentiment_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/sentometrics_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/sentometrics_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/docsearch.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_name": "sentometrics", 3 | "start_urls": [ 4 | { 5 | "url": "https://sentometricsresearch.github.io/sentometrics/index.html", 6 | "selectors_key": "homepage", 7 | "tags": [ 8 | "homepage" 9 | ] 10 | }, 11 | { 12 | "url": "https://sentometricsresearch.github.io/sentometrics/reference", 13 | "selectors_key": "reference", 14 | "tags": [ 15 | "reference" 16 | ] 17 | }, 18 | { 19 | "url": "https://sentometricsresearch.github.io/sentometrics/articles", 20 | "selectors_key": "articles", 21 | "tags": [ 22 | "articles" 23 | ] 24 | } 25 | ], 26 | "stop_urls": [ 27 | "/reference/$", 28 | "/reference/index.html", 29 | "/articles/$", 30 | "/articles/index.html" 31 | ], 32 | "sitemap_urls": [ 33 | "https://sentometricsresearch.github.io/sentometrics/sitemap.xml" 34 | ], 35 | "selectors": { 36 | "homepage": { 37 | "lvl0": { 38 | "selector": ".contents h1", 39 | "default_value": "sentometrics Home page" 40 | }, 41 | "lvl1": { 42 | "selector": ".contents h2" 43 | }, 44 | "lvl2": { 45 | "selector": ".contents h3", 46 | "default_value": "Context" 47 | }, 48 | "lvl3": ".ref-arguments td, .ref-description", 49 | "text": ".contents p, .contents li, .contents .pre" 50 | }, 51 | "reference": { 52 | "lvl0": { 53 | "selector": ".contents h1" 54 | }, 55 | "lvl1": { 56 | "selector": ".contents .name", 57 | "default_value": "Argument" 58 | }, 59 | "lvl2": { 60 | "selector": ".ref-arguments th", 61 | "default_value": "Description" 62 | }, 63 | "lvl3": ".ref-arguments td, .ref-description", 64 | "text": ".contents p, .contents li" 65 | }, 66 | "articles": { 67 | "lvl0": { 68 | "selector": ".contents h1" 69 | }, 70 | "lvl1": { 71 | "selector": ".contents .name" 72 | }, 73 | "lvl2": { 74 | "selector": ".contents h2, .contents h3", 75 | "default_value": "Context" 76 | }, 77 | "text": ".contents p, .contents li" 78 | } 79 | }, 80 | "selectors_exclude": [ 81 | ".dont-index" 82 | ], 83 | "min_indexed_level": 2, 84 | "custom_settings": { 85 | "separatorsToIndex": "_", 86 | "attributesToRetrieve": [ 87 | "hierarchy", 88 | "content", 89 | "anchor", 90 | "url", 91 | "url_without_anchor" 92 | ] 93 | } 94 | } 95 | 96 | -------------------------------------------------------------------------------- /docs/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon-16x16.png -------------------------------------------------------------------------------- /docs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon-32x32.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/favicon.ico -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/logo.png -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: '3.2' 2 | pkgdown: 2.1.1 3 | pkgdown_sha: ~ 4 | articles: 5 | examples/corpus: examples/corpus.html 6 | development: development.html 7 | applications/epu: applications/epu.html 8 | contributions/gopress: contributions/gopress.html 9 | examples/indexation: examples/indexation.html 10 | contributions/isa: contributions/isa.html 11 | examples/modeling: examples/modeling.html 12 | examples/sentiment: examples/sentiment.html 13 | sentometrics: sentometrics.html 14 | applications/vix: applications/vix.html 15 | last_built: 2025-04-02T11:29Z 16 | urls: 17 | reference: https://sentometricsresearch.github.io/sentometrics/reference 18 | article: https://sentometricsresearch.github.io/sentometrics/articles 19 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/figures/gsoc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/gsoc.png -------------------------------------------------------------------------------- /docs/reference/figures/innoviris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/innoviris.png -------------------------------------------------------------------------------- /docs/reference/figures/ivado.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/ivado.png -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/reference/figures/snsf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/snsf.png -------------------------------------------------------------------------------- /docs/reference/figures/swissuniversities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/figures/swissuniversities.png -------------------------------------------------------------------------------- /docs/reference/plot.sento_measures-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/docs/reference/plot.sento_measures-1.png -------------------------------------------------------------------------------- /examples/vix.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/examples/vix.rda -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | [![CRAN](https://www.r-pkg.org/badges/version/sentometrics)](https://cran.r-project.org/package=sentometrics) 4 | [![codecov](https://codecov.io/github/SentometricsResearch/sentometrics/branch/master/graphs/badge.svg)](https://codecov.io/github/SentometricsResearch/sentometrics) 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/last-day/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 6 | [![Downloads](https://cranlogs.r-pkg.org/badges/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 7 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/sentometrics?color=ff69b4)](https://www.r-pkg.org/pkg/sentometrics) 8 | 9 | 10 | # sentometrics 11 | 12 | > The **`sentometrics`** package offers an **integrated framework for textual sentiment time series aggregation and prediction**. It accounts for the intrinsic challenge that textual sentiment can be computed in many different ways, as well as the large number of possibilities to pool sentiment into a time series index. The package integrates the fast _quantification_ of sentiment from texts, the _aggregation_ into different sentiment time series, and the _prediction_ based on these measures. All in one coherent workflow! 13 | 14 | Explore this package website to learn about what you can do with **`sentometrics`** and how so. 15 | 16 | ### Reference 17 | 18 | Please cite **`sentometrics`** in publications. See the **Citation** section on the right. 19 | 20 | ### Acknowledgements 21 | 22 | This software package originates from a 23 | [Google Summer of Code 2017](https://github.com/rstats-gsoc/gsoc2017/wiki/Sentometrics:-An-integrated-framework-for-text-based-multivariate-time-series-modeling-and-forecasting) project, was further developed 24 | during a follow-up [Google Summer of Code 2019](https://github.com/rstats-gsoc/gsoc2019/wiki/sentometrics) project, and benefited generally from financial support by [Innoviris](https://innoviris.brussels), [IVADO](https://www.ivado.ca), [swissuniversities](https://www.swissuniversities.ch), and the [Swiss National Science Foundation](http://www.snf.ch) (grants #179281 and #191730). 25 | 26 | 31 | 32 | ### Contact 33 | 34 | Reach out to [Samuel Borms](mailto:borms_sam@hotmail.com) if you have questions, suggestions or want to become a contributor. See the **News > Development** section to find out what you can help us with. 35 | 36 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | 2 | bibentry(bibtype = "Article", 3 | title = "The {R} Package {sentometrics} to Compute, Aggregate, and Predict with Textual Sentiment", 4 | author = c(person(given = "David", 5 | family = "Ardia", 6 | email = "david.ardia@hec.ca"), 7 | person(given = "Keven", 8 | family = "Bluteau", 9 | email = "keven.bluteau@usherbrooke.ca"), 10 | person(given = "Samuel", 11 | family = "Borms", 12 | email = "borms_sam@hotmail.com"), 13 | person(given = "Kris", 14 | family = "Boudt", 15 | email = "kris.boudt@ugent.be")), 16 | journal = "Journal of Statistical Software", 17 | year = "2021", 18 | volume = "99", 19 | number = "2", 20 | pages = "1--40", 21 | doi = "10.18637/jss.v099.i02", 22 | header = "To cite sentometrics in publications use:" 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /inst/extdata/test_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/inst/extdata/test_data.rda -------------------------------------------------------------------------------- /man/add_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentocorpus.R 3 | \name{add_features} 4 | \alias{add_features} 5 | \title{Add feature columns to a (sento_)corpus object} 6 | \usage{ 7 | add_features( 8 | corpus, 9 | featuresdf = NULL, 10 | keywords = NULL, 11 | do.binary = TRUE, 12 | do.regex = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}, or a \pkg{quanteda} 17 | \code{\link[quanteda]{corpus}} object.} 18 | 19 | \item{featuresdf}{a named \code{data.frame} of type \code{numeric} where each columns is a new feature to be added to the 20 | inputted \code{corpus} object. If the number of rows in \code{featuresdf} is not equal to the number of documents 21 | in \code{corpus}, recycling will occur. The numeric values should be between 0 and 1 (included).} 22 | 23 | \item{keywords}{a named \code{list}. For every element, a new feature column is added with a value of 1 for the texts 24 | in which (at least one of) the keyword(s) appear(s), and 0 if not (for \code{do.binary = TRUE}), or with as value the 25 | normalized number of times the keyword(s) occur(s) in the text (for \code{do.binary = FALSE}). If no texts match a 26 | keyword, no column is added. The \code{list} names are used as the names of the new features. For more complex searching, 27 | instead of just keywords, one can also directly use a single regex expression to define a new feature (see the details section).} 28 | 29 | \item{do.binary}{a \code{logical}, if \code{do.binary = FALSE}, the number of occurrences are normalized 30 | between 0 and 1 (see argument \code{keywords}).} 31 | 32 | \item{do.regex}{a \code{logical} vector equal in length to the number of elements in the \code{keywords} argument 33 | \code{list}, or a single value if it applies to all. It should be set to \code{TRUE} at those positions where a single 34 | regex expression is used to identify the particular feature.} 35 | } 36 | \value{ 37 | An updated \code{corpus} object. 38 | } 39 | \description{ 40 | Adds new feature columns, either user-supplied or based on keyword(s)/regex pattern search, to 41 | a provided \code{sento_corpus} or a \pkg{quanteda} \code{\link[quanteda]{corpus}} object. 42 | } 43 | \details{ 44 | If a provided feature name is already part of the corpus, it will be replaced. The \code{featuresdf} and 45 | \code{keywords} arguments can be provided at the same time, or only one of them, leaving the other at \code{NULL}. We use 46 | the \pkg{stringi} package for searching the keywords. The \code{do.regex} argument points to the corresponding elements 47 | in \code{keywords}. For \code{FALSE}, we transform the keywords into a simple regex expression, involving \code{"\\b"} for 48 | exact word boundary matching and (if multiple keywords) \code{|} as OR operator. The elements associated to \code{TRUE} do 49 | not undergo this transformation, and are evaluated as given, if the corresponding keywords vector consists of only one 50 | expression. For a large corpus and/or complex regex patterns, this function may require some patience. Scaling between 0 51 | and 1 is performed via min-max normalization, per column. 52 | } 53 | \examples{ 54 | set.seed(505) 55 | 56 | # construct a corpus and add (a) feature(s) to it 57 | corpus <- quanteda::corpus_sample( 58 | sento_corpus(corpusdf = sentometrics::usnews), 500 59 | ) 60 | corpus1 <- add_features(corpus, 61 | featuresdf = data.frame(random = runif(quanteda::ndoc(corpus)))) 62 | corpus2 <- add_features(corpus, 63 | keywords = list(pres = "president", war = "war"), 64 | do.binary = FALSE) 65 | corpus3 <- add_features(corpus, 66 | keywords = list(pres = c("Obama", "US president"))) 67 | corpus4 <- add_features(corpus, 68 | featuresdf = data.frame(all = 1), 69 | keywords = list(pres1 = "Obama|US [p|P]resident", 70 | pres2 = "\\\\bObama\\\\b|\\\\bUS president\\\\b", 71 | war = "war"), 72 | do.regex = c(TRUE, TRUE, FALSE)) 73 | 74 | sum(quanteda::docvars(corpus3, "pres")) == 75 | sum(quanteda::docvars(corpus4, "pres2")) # TRUE 76 | 77 | # adding a complementary feature 78 | nonpres <- data.frame(nonpres = as.numeric(!quanteda::docvars(corpus3, "pres"))) 79 | corpus3 <- add_features(corpus3, featuresdf = nonpres) 80 | 81 | } 82 | \author{ 83 | Samuel Borms 84 | } 85 | -------------------------------------------------------------------------------- /man/aggregate.sentiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_main.R 3 | \name{aggregate.sentiment} 4 | \alias{aggregate.sentiment} 5 | \title{Aggregate textual sentiment across sentences, documents and time} 6 | \usage{ 7 | \method{aggregate}{sentiment}(x, ctr, do.full = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sentiment} object created using \code{\link{compute_sentiment}} (from a \code{sento_corpus} 11 | object) or using \code{\link{as.sentiment}}.} 12 | 13 | \item{ctr}{output from a \code{\link{ctr_agg}} call. The \code{howWithin} and \code{nCore} elements are ignored.} 14 | 15 | \item{do.full}{if \code{do.full = TRUE} (by default), does entire aggregation up to a \code{sento_measures} 16 | object, else only goes from sentence-level to document-level. Ignored if no \code{"sentence_id"} column in 17 | \code{sentiment} input object.} 18 | 19 | \item{...}{not used.} 20 | } 21 | \value{ 22 | A document-level \code{sentiment} object or a fully aggregated \code{sento_measures} object. 23 | } 24 | \description{ 25 | Aggregates textual sentiment scores at sentence- or document-level into a panel of textual 26 | sentiment measures. Can also be used to aggregate sentence-level sentiment scores into 27 | document-level sentiment scores. This function is called within the \code{\link{sento_measures}} function. 28 | } 29 | \examples{ 30 | set.seed(505) 31 | 32 | data("usnews", package = "sentometrics") 33 | data("list_lexicons", package = "sentometrics") 34 | data("list_valence_shifters", package = "sentometrics") 35 | 36 | # computation of sentiment 37 | corpus <- sento_corpus(corpusdf = usnews) 38 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 39 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], 40 | list_valence_shifters[["en"]]) 41 | l2 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], 42 | list_valence_shifters[["en"]][, c("x", "t")]) 43 | sent1 <- compute_sentiment(corpusSample, l1, how = "counts") 44 | sent2 <- compute_sentiment(corpusSample, l2, do.sentence = TRUE) 45 | sent3 <- compute_sentiment(as.character(corpusSample), l2, 46 | do.sentence = TRUE) 47 | ctr <- ctr_agg(howTime = c("linear"), by = "year", lag = 3) 48 | 49 | # aggregate into sentiment measures 50 | sm1 <- aggregate(sent1, ctr) 51 | sm2 <- aggregate(sent2, ctr) 52 | 53 | # two-step aggregation (first into document-level sentiment) 54 | sd2 <- aggregate(sent2, ctr, do.full = FALSE) 55 | sm3 <- aggregate(sd2, ctr) 56 | 57 | # aggregation of a sentiment data.table 58 | cols <- c("word_count", names(l2)[-length(l2)]) 59 | sd3 <- sent3[, lapply(.SD, sum), by = "id", .SDcols = cols] 60 | 61 | } 62 | \seealso{ 63 | \code{\link{compute_sentiment}}, \code{\link{ctr_agg}}, \code{\link{sento_measures}} 64 | } 65 | \author{ 66 | Samuel Borms, Keven Bluteau 67 | } 68 | -------------------------------------------------------------------------------- /man/as.data.table.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{as.data.table.sento_measures} 4 | \alias{as.data.table.sento_measures} 5 | \title{Get the sentiment measures} 6 | \usage{ 7 | \method{as.data.table}{sento_measures}(x, keep.rownames = FALSE, format = "wide", ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{keep.rownames}{see \code{\link[data.table]{as.data.table}}.} 13 | 14 | \item{format}{a single \code{character} vector, one of \code{c("wide", "long")}.} 15 | 16 | \item{...}{not used.} 17 | } 18 | \value{ 19 | The panel of sentiment measures under \code{sento_measures[["measures"]]}, 20 | in wide or long format. 21 | } 22 | \description{ 23 | Extracts the sentiment measures \code{data.table} in either wide (by default) 24 | or long format. 25 | } 26 | \examples{ 27 | data("usnews", package = "sentometrics") 28 | data("list_lexicons", package = "sentometrics") 29 | data("list_valence_shifters", package = "sentometrics") 30 | 31 | sm <- sento_measures(sento_corpus(corpusdf = usnews[1:200, ]), 32 | sento_lexicons(list_lexicons["LM_en"]), 33 | ctr_agg(lag = 3)) 34 | 35 | data.table::as.data.table(sm) 36 | data.table::as.data.table(sm, format = "long") 37 | 38 | } 39 | \author{ 40 | Samuel Borms 41 | } 42 | -------------------------------------------------------------------------------- /man/as.sentiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentiment_engines.R 3 | \name{as.sentiment} 4 | \alias{as.sentiment} 5 | \title{Convert a sentiment table to a sentiment object} 6 | \usage{ 7 | as.sentiment(s) 8 | } 9 | \arguments{ 10 | \item{s}{a \code{data.table} or \code{data.frame} that can be converted into a \code{sentiment} object. It 11 | should have at least an \code{"id"}, a \code{"date"}, a \code{"word_count"} and one sentiment scores column. 12 | If other column names are provided with a separating \code{"--"}, the first part is considered the lexicon 13 | (or more generally, the sentiment computation method), and the second part the feature. For sentiment column 14 | names without any \code{"--"}, a \code{"dummyFeature"} component is added.} 15 | } 16 | \value{ 17 | A \code{sentiment} object. 18 | } 19 | \description{ 20 | Converts a properly structured sentiment table into a \code{sentiment} object, that can be used 21 | for further aggregation with the \code{\link{aggregate.sentiment}} function. This allows to start from 22 | sentiment scores not necessarily computed with \code{\link{compute_sentiment}}. 23 | } 24 | \examples{ 25 | set.seed(505) 26 | 27 | data("usnews", package = "sentometrics") 28 | data("list_lexicons", package = "sentometrics") 29 | 30 | ids <- paste0("id", 1:200) 31 | dates <- sample(seq(as.Date("2015-01-01"), as.Date("2018-01-01"), by = "day"), 200, TRUE) 32 | word_count <- sample(150:850, 200, replace = TRUE) 33 | sent <- matrix(rnorm(200 * 8), nrow = 200) 34 | s1 <- s2 <- data.table::data.table(id = ids, date = dates, word_count = word_count, sent) 35 | s3 <- data.frame(id = ids, date = dates, word_count = word_count, sent, 36 | stringsAsFactors = FALSE) 37 | s4 <- compute_sentiment(usnews$texts[201:400], 38 | sento_lexicons(list_lexicons["GI_en"]), 39 | "counts", do.sentence = TRUE) 40 | m <- "method" 41 | 42 | colnames(s1)[-c(1:3)] <- paste0(m, 1:8) 43 | sent1 <- as.sentiment(s1) 44 | 45 | colnames(s2)[-c(1:3)] <- c(paste0(m, 1:4, "--", "feat1"), paste0(m, 1:4, "--", "feat2")) 46 | sent2 <- as.sentiment(s2) 47 | 48 | colnames(s3)[-c(1:3)] <- c(paste0(m, 1:3, "--", "feat1"), paste0(m, 1:3, "--", "feat2"), 49 | paste0(m, 4:5)) 50 | sent3 <- as.sentiment(s3) 51 | 52 | s4[, "date" := rep(dates, s4[, max(sentence_id), by = id][[2]])] 53 | sent4 <- as.sentiment(s4) 54 | 55 | # further aggregation from then on is easy... 56 | sentMeas1 <- aggregate(sent1, ctr_agg(lag = 10)) 57 | sent5 <- aggregate(sent4, ctr_agg(howDocs = "proportional"), do.full = FALSE) 58 | 59 | } 60 | \author{ 61 | Samuel Borms 62 | } 63 | -------------------------------------------------------------------------------- /man/as.sento_corpus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentocorpus.R 3 | \name{as.sento_corpus} 4 | \alias{as.sento_corpus} 5 | \title{Convert a quanteda or tm corpus object into a sento_corpus object} 6 | \usage{ 7 | as.sento_corpus(x, dates = NULL, do.clean = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{a \pkg{quanteda} \code{\link[quanteda]{corpus}} object, a \pkg{tm} 11 | \code{\link[tm]{SimpleCorpus}} or a \pkg{tm} \code{\link[tm]{VCorpus}} object. For \pkg{tm} 12 | corpora, every corpus element should consist of a single \code{"content"} \code{character} vector 13 | as the document unit.} 14 | 15 | \item{dates}{an optional sequence of dates as \code{"yyyy-mm-dd"}, of the same length as the number 16 | of documents in the input corpus, to define the \code{"date"} column. If \code{dates = NULL}, the 17 | \code{"date"} metadata element in the input corpus, if available, will be used but should be in the 18 | same \code{"yyyy-mm-dd"} format.} 19 | 20 | \item{do.clean}{see \code{\link{sento_corpus}}.} 21 | } 22 | \value{ 23 | A \code{sento_corpus} object, as returned by the \code{\link{sento_corpus}} function. 24 | } 25 | \description{ 26 | Converts most common \pkg{quanteda} and \pkg{tm} corpus objects into a 27 | \code{sento_corpus} object. Appropriate available metadata is integrated as features; 28 | for a \pkg{quanteda} corpus, this can come from \code{docvars(x)}, for a \pkg{tm} corpus, 29 | only \code{meta(x, type = "indexed")} metadata is considered. 30 | } 31 | \examples{ 32 | data("usnews", package = "sentometrics") 33 | txt <- system.file("texts", "txt", package = "tm") 34 | reuters <- system.file("texts", "crude", package = "tm") 35 | 36 | # reshuffle usnews data.frame for use in quanteda and tm 37 | dates <- usnews$date 38 | usnews$wrong <- "notNumeric" 39 | colnames(usnews)[c(1, 3)] <- c("doc_id", "text") 40 | 41 | # conversion from a quanteda corpus 42 | qcorp <- quanteda::corpus(usnews, 43 | text_field = "text", docid_field = "doc_id") 44 | corp1 <- as.sento_corpus(qcorp) 45 | corp2 <- as.sento_corpus(qcorp, sample(dates)) # overwrites "date" column 46 | 47 | # conversion from a tm SimpleCorpus corpus (DataframeSource) 48 | tmSCdf <- tm::SimpleCorpus(tm::DataframeSource(usnews)) 49 | corp3 <- as.sento_corpus(tmSCdf) 50 | 51 | # conversion from a tm SimpleCorpus corpus (DirSource) 52 | tmSCdir <- tm::SimpleCorpus(tm::DirSource(txt)) 53 | corp4 <- as.sento_corpus(tmSCdir, dates[1:length(tmSCdir)]) 54 | 55 | # conversion from a tm VCorpus corpus (DataframeSource) 56 | tmVCdf <- tm::VCorpus(tm::DataframeSource(usnews)) 57 | corp5 <- as.sento_corpus(tmVCdf) 58 | 59 | # conversion from a tm VCorpus corpus (DirSource) 60 | tmVCdir <- tm::VCorpus(tm::DirSource(reuters), 61 | list(reader = tm::readReut21578XMLasPlain)) 62 | corp6 <- as.sento_corpus(tmVCdir, dates[1:length(tmVCdir)]) 63 | 64 | } 65 | \seealso{ 66 | \code{\link[quanteda]{corpus}}, \code{\link[tm]{SimpleCorpus}}, \code{\link[tm]{VCorpus}}, 67 | \code{\link{sento_corpus}} 68 | } 69 | \author{ 70 | Samuel Borms 71 | } 72 | -------------------------------------------------------------------------------- /man/attributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/attribution.R 3 | \name{attributions} 4 | \alias{attributions} 5 | \title{Retrieve top-down model sentiment attributions} 6 | \usage{ 7 | attributions( 8 | model, 9 | sento_measures, 10 | do.lags = TRUE, 11 | do.normalize = FALSE, 12 | refDates = NULL, 13 | factor = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{model}{a \code{sento_model} or a \code{sento_modelIter} object created with \code{\link{sento_model}}.} 18 | 19 | \item{sento_measures}{the \code{sento_measures} object, as created with \code{\link{sento_measures}}, used to estimate 20 | the model from the first argument (make sure this is the case!).} 21 | 22 | \item{do.lags}{a \code{logical}, \code{TRUE} also computes the attribution to each time lag. For large time lags, 23 | this is time-consuming.} 24 | 25 | \item{do.normalize}{a \code{logical}, \code{TRUE} divides each element of every attribution vector at a given date by its 26 | L2-norm at that date, normalizing the values between -1 and 1. The document attributions are not normalized.} 27 | 28 | \item{refDates}{the dates (as \code{"yyyy-mm-dd"}) at which attribution is to be performed. These should be between the latest 29 | date available in the input \code{sento_measures} object and the first estimation sample date (that is, \code{model$dates[1]} 30 | if \code{model} is a \code{sento_model} object). All dates should also be in \code{get_dates(sento_measures)}. If 31 | \code{NULL} (default), attribution is calculated for all in-sample dates. Ignored if \code{model} is a \code{sento_modelIter} 32 | object, for which attribution is calculated for all out-of-sample prediction dates.} 33 | 34 | \item{factor}{the factor level as a single \code{character} vector to calculate attribution 35 | for in case of (a) multinomial model(s). Ignored for linear and binomial models.} 36 | } 37 | \value{ 38 | A \code{list} of class \code{attributions}, with \code{"documents"}, \code{"lags"}, \code{"lexicons"}, 39 | \code{"features"} and \code{"time"} as attribution dimensions. The last four dimensions are 40 | \code{data.table}s having a \code{"date"} column and the other columns the different components of the dimension, with 41 | the attributions as values. Document-level attribution is further decomposed into a \code{data.table} per date, with 42 | \code{"id"}, \code{"date"} and \code{"attrib"} columns. If \code{do.lags = FALSE}, the \code{"lags"} element is set 43 | to \code{NULL}. 44 | } 45 | \description{ 46 | Computes the attributions to predictions for a (given) number of dates at all possible sentiment dimensions, 47 | based on the coefficients associated to each sentiment measure, as estimated in the provided model object. 48 | } 49 | \details{ 50 | See \code{\link{sento_model}} for an elaborate modeling example including the calculation and plotting of 51 | attributions. The attribution for logistic models is represented in terms of log odds. For binomial models, it is 52 | calculated with respect to the last factor level or factor column. A \code{NULL} value for document-level attribution 53 | on a given date means no documents are directly implicated in the associated prediction. 54 | } 55 | \seealso{ 56 | \code{\link{sento_model}} 57 | } 58 | \author{ 59 | Samuel Borms, Keven Bluteau 60 | } 61 | -------------------------------------------------------------------------------- /man/corpus_summarize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentocorpus.R 3 | \name{corpus_summarize} 4 | \alias{corpus_summarize} 5 | \title{Summarize the sento_corpus object} 6 | \usage{ 7 | corpus_summarize(x, by = "day", features = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{is a \code{sento_corpus} object created with \code{\link{sento_corpus}}} 11 | 12 | \item{by}{a single \code{character} vector to specify the frequency time interval over which the statistics 13 | need to be calculated.} 14 | 15 | \item{features}{a \code{character} vector that can be used to select a subset of the features to analyse.} 16 | } 17 | \value{ 18 | returns a \code{list} containing: 19 | \item{stats}{a \code{data.table} with statistics about the number of documents, total, average, minimum and maximum 20 | number of tokens and the number of texts per features for each date.} 21 | \item{plots}{a \code{list} with three plots representing the above statistics.} 22 | } 23 | \description{ 24 | Summarizes the \code{sento_corpus} object and returns insights about the evolution of 25 | documents, features and tokens over time. 26 | } 27 | \details{ 28 | This function summarizes the \code{sento_corpus} object by generating statistics about 29 | documents, features and tokens over time. The insights can be narrowed down to a chosen set of metadata 30 | features. The same tokenization as in the sentiment calculation in \code{\link{compute_sentiment}} is used. 31 | } 32 | \examples{ 33 | data("usnews", package = "sentometrics") 34 | 35 | corpus <- sento_corpus(usnews) 36 | 37 | # summary of corpus by day 38 | summary1 <- corpus_summarize(corpus) 39 | 40 | # summary of corpus by month for both journals 41 | summary2 <- corpus_summarize(corpus, by = "month", 42 | features = c("wsj", "wapo")) 43 | 44 | } 45 | \author{ 46 | Jeroen Van Pelt, Samuel Borms, Andres Algaba 47 | } 48 | -------------------------------------------------------------------------------- /man/data-defunct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \docType{data} 4 | \name{data-defunct} 5 | \alias{data-defunct} 6 | \alias{lexicons} 7 | \alias{valence} 8 | \title{Datasets with defunct names} 9 | \description{ 10 | These are datasets that have been renamed and removed. 11 | } 12 | \details{ 13 | The dataset \code{lexicons} is defunct, use \code{list_lexicons} instead. 14 | 15 | The dataset \code{valence} is defunct, use \code{list_valence_shifters} instead. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/diff.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{diff.sento_measures} 4 | \alias{diff.sento_measures} 5 | \title{Differencing of sentiment measures} 6 | \usage{ 7 | \method{diff}{sento_measures}(x, lag = 1, differences = 1, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{lag}{a \code{numeric}, see documentation for the generic \code{\link{diff}}.} 13 | 14 | \item{differences}{a \code{numeric}, see documentation for the generic \code{\link{diff}}.} 15 | 16 | \item{...}{not used.} 17 | } 18 | \value{ 19 | A modified \code{sento_measures} object, with the measures replaced by the differenced measures as well as updated 20 | statistics. 21 | } 22 | \description{ 23 | Differences the sentiment measures from a \code{sento_measures} object. 24 | } 25 | \examples{ 26 | data("usnews", package = "sentometrics") 27 | data("list_lexicons", package = "sentometrics") 28 | data("list_valence_shifters", package = "sentometrics") 29 | 30 | # construct a sento_measures object to start with 31 | corpus <- sento_corpus(corpusdf = usnews) 32 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 33 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]]) 34 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3) 35 | sento_measures <- sento_measures(corpusSample, l, ctr) 36 | 37 | # first-order difference sentiment measures with a lag of two 38 | diffed <- diff(sento_measures, lag = 2, differences = 1) 39 | 40 | } 41 | \author{ 42 | Samuel Borms 43 | } 44 | -------------------------------------------------------------------------------- /man/epu.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentometrics.R 3 | \docType{data} 4 | \name{epu} 5 | \alias{epu} 6 | \title{Monthly U.S. Economic Policy Uncertainty index} 7 | \format{ 8 | A \code{data.frame} with 403 rows and 4 columns. 9 | } 10 | \source{ 11 | \href{http://www.policyuncertainty.com/us_monthly.html}{Measuring Economic Policy Uncertainty}. Retrieved 12 | August 24, 2018. 13 | } 14 | \usage{ 15 | data("epu") 16 | } 17 | \description{ 18 | Monthly news-based U.S. Economic Policy Uncertainty (EPU) index (Baker, Bloom and Davis, 2016). Goes from January 1985 19 | to July 2018, and includes a binomial and a multinomial example series. Following columns are present: 20 | 21 | \itemize{ 22 | \item date. Date as \code{"yyyy-mm-01"}. 23 | \item index. A \code{numeric} monthly index value. 24 | \item above. A \code{factor} with value \code{"above"} if the index is greater than the mean of the entire series, else 25 | \code{"below"}. 26 | \item aboveMulti. A \code{factor} with values \code{"above+"}, \code{"above"}, \code{"below"} and \code{"below-"} if the 27 | index is greater than the 75\% quantile and the 50\% quantile, or smaller than the 50\% quantile and the 25\% quantile, 28 | respectively and in a mutually exclusive sense. 29 | } 30 | } 31 | \examples{ 32 | data("epu", package = "sentometrics") 33 | head(epu) 34 | 35 | } 36 | \references{ 37 | Baker, Bloom and Davis (2016). \strong{Measuring Economic Policy Uncertainty}. 38 | \emph{The Quarterly Journal of Economics 131, 1593-1636}, \doi{10.1093/qje/qjw024}. 39 | } 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/figures/gsoc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/gsoc.png -------------------------------------------------------------------------------- /man/figures/innoviris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/innoviris.png -------------------------------------------------------------------------------- /man/figures/ivado.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/ivado.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/snsf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/snsf.png -------------------------------------------------------------------------------- /man/figures/swissuniversities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/man/figures/swissuniversities.png -------------------------------------------------------------------------------- /man/get_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{get_dates} 4 | \alias{get_dates} 5 | \title{Get the dates of the sentiment measures/time series} 6 | \usage{ 7 | get_dates(sento_measures) 8 | } 9 | \arguments{ 10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | } 12 | \value{ 13 | The \code{"date"} column in \code{sento_measures[["measures"]]} as a \code{character} vector. 14 | } 15 | \description{ 16 | Returns the dates of the sentiment time series. 17 | } 18 | \author{ 19 | Samuel Borms 20 | } 21 | -------------------------------------------------------------------------------- /man/get_dimensions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{get_dimensions} 4 | \alias{get_dimensions} 5 | \title{Get the dimensions of the sentiment measures} 6 | \usage{ 7 | get_dimensions(sento_measures) 8 | } 9 | \arguments{ 10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | } 12 | \value{ 13 | The \code{"features"}, \code{"lexicons"} and \code{"time"} elements in \code{sento_measures}. 14 | } 15 | \description{ 16 | Returns the components across all three dimensions of the sentiment measures. 17 | } 18 | \author{ 19 | Samuel Borms 20 | } 21 | -------------------------------------------------------------------------------- /man/get_hows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_hows} 4 | \alias{get_hows} 5 | \title{Options supported to perform aggregation into sentiment measures} 6 | \usage{ 7 | get_hows() 8 | } 9 | \value{ 10 | A list with the supported aggregation hows for arguments \code{howWithin} (\code{"words"}), \code{howDows} 11 | (\code{"docs"}) and \code{howTime} (\code{"time"}), to be supplied to \code{\link{ctr_agg}}. 12 | } 13 | \description{ 14 | Outputs the supported aggregation arguments. Call for information purposes only. Used within 15 | \code{\link{ctr_agg}} to check if supplied aggregation hows are supported. 16 | } 17 | \details{ 18 | See the package's \href{https://www.ssrn.com/abstract=3067734}{vignette} for a detailed explanation of all 19 | aggregation options. 20 | } 21 | \seealso{ 22 | \code{\link{ctr_agg}} 23 | } 24 | -------------------------------------------------------------------------------- /man/get_loss_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomodel.R 3 | \name{get_loss_data} 4 | \alias{get_loss_data} 5 | \title{Retrieve loss data from a selection of models} 6 | \usage{ 7 | get_loss_data(models, loss = c("DA", "error", "errorSq", "AD", "accuracy")) 8 | } 9 | \arguments{ 10 | \item{models}{a named \code{list} of \code{sento_modelIter} objects. All models should be of the same family, being 11 | either \code{"gaussian"}, \code{"binomial"} or \code{"multinomial"}, and have performance data of the same dimensions.} 12 | 13 | \item{loss}{a single \code{character} vector, either \code{"DA"} (directional \emph{in}accuracy), \code{"error"} 14 | (predicted minus realized response variable), \code{"errorSq"} (squared errors), \code{"AD"} (absolute errors) or 15 | \code{"accuracy"} (\emph{in}accurate class predictions). This argument defines on what basis the model confidence set 16 | is calculated. The first four options are available for \code{"gaussian"} models, the last option applies only to 17 | \code{"binomial"} and \code{"multinomial"} models.} 18 | } 19 | \value{ 20 | A \code{matrix} of loss data. 21 | } 22 | \description{ 23 | Structures specific performance data for a set of different \code{sento_modelIter} objects as loss data. 24 | Can then be used, for instance, as an input to create a model confidence set (Hansen, Lunde and Nason, 2011) with 25 | the \pkg{MCS} package. 26 | } 27 | \examples{ 28 | \dontrun{ 29 | data("usnews", package = "sentometrics") 30 | data("list_lexicons", package = "sentometrics") 31 | data("list_valence_shifters", package = "sentometrics") 32 | data("epu", package = "sentometrics") 33 | 34 | set.seed(505) 35 | 36 | # construct two sento_measures objects 37 | corpusAll <- sento_corpus(corpusdf = usnews) 38 | corpus <- quanteda::corpus_subset(corpusAll, date >= "1997-01-01" & date < "2014-10-01") 39 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]]) 40 | 41 | ctrA <- ctr_agg(howWithin = "proportionalPol", howDocs = "proportional", 42 | howTime = c("equal_weight", "linear"), by = "month", lag = 3) 43 | sentMeas <- sento_measures(corpus, l, ctrA) 44 | 45 | # prepare y and other x variables 46 | y <- epu[epu$date \%in\% get_dates(sentMeas), "index"] 47 | length(y) == nobs(sentMeas) # TRUE 48 | x <- data.frame(runif(length(y)), rnorm(length(y))) # two other (random) x variables 49 | colnames(x) <- c("x1", "x2") 50 | 51 | # estimate different type of regressions 52 | ctrM <- ctr_model(model = "gaussian", type = "AIC", do.iter = TRUE, 53 | h = 0, nSample = 120, start = 50) 54 | out1 <- sento_model(sentMeas, y, x = x, ctr = ctrM) 55 | out2 <- sento_model(sentMeas, y, x = NULL, ctr = ctrM) 56 | out3 <- sento_model(subset(sentMeas, select = "linear"), y, x = x, ctr = ctrM) 57 | out4 <- sento_model(subset(sentMeas, select = "linear"), y, x = NULL, ctr = ctrM) 58 | 59 | lossData <- get_loss_data(models = list(m1 = out1, m2 = out2, m3 = out3, m4 = out4), 60 | loss = "errorSq") 61 | 62 | mcs <- MCS::MCSprocedure(lossData)} 63 | 64 | } 65 | \references{ 66 | Hansen, Lunde and Nason (2011). \strong{The model confidence set}. \emph{Econometrica 79, 453-497}, 67 | \doi{10.3982/ECTA5771}. 68 | } 69 | \seealso{ 70 | \code{\link{sento_model}}, \code{\link[MCS]{MCSprocedure}} 71 | } 72 | \author{ 73 | Samuel Borms 74 | } 75 | -------------------------------------------------------------------------------- /man/list_lexicons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentometrics.R 3 | \docType{data} 4 | \name{list_lexicons} 5 | \alias{list_lexicons} 6 | \title{Built-in lexicons} 7 | \format{ 8 | A \code{list} with all built-in lexicons, appropriately named as \code{"NAME_language(_tr)"}. 9 | } 10 | \source{ 11 | \href{https://link.springer.com/article/10.1007/s10579-016-9364-5}{FEEL lexicon}. Retrieved November 1, 2017. 12 | 13 | \href{https://inquirer.sites.fas.harvard.edu}{GI lexicon}. Retrieved November 1, 2017. 14 | 15 | \href{https://journals.sagepub.com/doi/abs/10.1177/0021943608319388}{HENRY lexicon}. Retrieved 16 | November 1, 2017. 17 | 18 | \href{https://sraf.nd.edu/textual-analysis/}{LM lexicon}. Retrieved 19 | November 1, 2017. 20 | } 21 | \usage{ 22 | data("list_lexicons") 23 | } 24 | \description{ 25 | A \code{list} containing all built-in lexicons as a \code{data.table} with two columns: a \code{x} column with the words, 26 | and a \code{y} column with the polarities. The \code{list} element names incorporate consecutively the name and language 27 | (based on the two-letter ISO code convention as in \code{\link[stopwords]{stopwords}}), and \code{"_tr"} as 28 | suffix if the lexicon is translated. The translation was done via Microsoft Translator through Microsoft 29 | Word. Only the entries that conform to the original language entry after retranslation, and those that have actually been 30 | translated, are kept. The last condition is assumed to be fulfilled when the translation differs from the original entry. 31 | All words are unigrams and in lowercase. The built-in lexicons are the following: 32 | 33 | \itemize{ 34 | \item FEEL_en_tr 35 | \item FEEL_fr (Abdaoui, \enc{Azé}{Aze}, Bringay and Poncelet, 2017) 36 | \item FEEL_nl_tr 37 | \item GI_en (General Inquirer, i.e. Harvard IV-4 combined with Laswell) 38 | \item GI_fr_tr 39 | \item GI_nl_tr 40 | \item HENRY_en (Henry, 2008) 41 | \item HENRY_fr_tr 42 | \item HENRY_nl_tr 43 | \item LM_en (Loughran and McDonald, 2011) 44 | \item LM_fr_tr 45 | \item LM_nl_tr 46 | } 47 | 48 | Other useful lexicons can be found in the \pkg{lexicon} package, more specifically the datasets preceded by 49 | \code{hash_sentiment_}. 50 | } 51 | \examples{ 52 | data("list_lexicons", package = "sentometrics") 53 | list_lexicons[c("FEEL_en_tr", "LM_en")] 54 | 55 | } 56 | \references{ 57 | Abdaoui, \enc{Azé}{Aze}, Bringay and Poncelet (2017). \strong{FEEL: French Expanded Emotion Lexicon}. 58 | \emph{Language Resources & Evaluation 51, 833-855}, \doi{10.1007/s10579-016-9364-5}. 59 | 60 | Henry (2008). \strong{Are investors influenced by how earnings press releases are written?}. 61 | \emph{Journal of Business Communication 45, 363-407}, \doi{10.1177/0021943608319388}. 62 | 63 | Loughran and McDonald (2011). \strong{When is a liability not a liability? Textual analysis, dictionaries, and 10-Ks}. 64 | \emph{Journal of Finance 66, 35-65}, \doi{10.1111/j.1540-6261.2010.01625.x}. 65 | } 66 | \keyword{datasets} 67 | -------------------------------------------------------------------------------- /man/list_valence_shifters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentometrics.R 3 | \docType{data} 4 | \name{list_valence_shifters} 5 | \alias{list_valence_shifters} 6 | \title{Built-in valence word lists} 7 | \format{ 8 | A \code{list} with all built-in valence word lists, appropriately named. 9 | } 10 | \source{ 11 | \code{\link[lexicon]{hash_valence_shifters}} (English valence shifters). Retrieved August 24, 2018. 12 | } 13 | \usage{ 14 | data("list_valence_shifters") 15 | } 16 | \description{ 17 | A \code{list} containing all built-in valence word lists, as \code{data.table}s with three columns: a \code{x} column with 18 | the words, a \code{y} column with the values associated to each word, and a \code{t} column with the type of valence 19 | shifter (\code{1} = negators, \code{2} = amplifiers, \code{3} = deamplifiers, 20 | \code{4} = adversative conjunctions). The \code{list} element names indicate the language 21 | (based on the two-letter ISO code convention as in \code{\link[stopwords]{stopwords}}) of the valence word list. 22 | All non-English word lists are translated via Microsoft Translator through Microsoft Word. Only the entries whose 23 | translation differs from the original entry are kept. All words are unigrams and in lowercase. The built-in valence word 24 | lists are available in following languages: 25 | 26 | \itemize{ 27 | \item English (\code{"en"}) 28 | \item French (\code{"fr"}) 29 | \item Dutch (\code{"nl"}) 30 | } 31 | } 32 | \examples{ 33 | data("list_valence_shifters", package = "sentometrics") 34 | list_valence_shifters["en"] 35 | 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /man/measures_fill.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_measures_xyz.R 3 | \name{measures_fill} 4 | \alias{measures_fill} 5 | \title{Add and fill missing dates to sentiment measures} 6 | \usage{ 7 | measures_fill( 8 | sento_measures, 9 | fill = "zero", 10 | dateBefore = NULL, 11 | dateAfter = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 16 | 17 | \item{fill}{an element of \code{c("zero", "latest")}; the first assumes missing dates represent zero sentiment, 18 | the second assumes missing dates represent constant sentiment.} 19 | 20 | \item{dateBefore}{a date as \code{"yyyy-mm-dd"}, to stretch the sentiment time series from up to the first date. Should 21 | be earlier than \code{get_dates(sento_measures)[1]} to take effect. The values for these dates are set to those at 22 | \code{get_dates(sento_measures)[1]}. If \code{NULL}, then ignored.} 23 | 24 | \item{dateAfter}{a date as \code{"yyyy-mm-dd"}, to stretch the sentiment time series up to this date. Should be 25 | later than \code{tail(get_dates(sento_measures), 1)} to take effect. If \code{NULL}, then ignored.} 26 | } 27 | \value{ 28 | A modified \code{sento_measures} object. 29 | } 30 | \description{ 31 | Adds missing dates between earliest and latest date of a \code{sento_measures} object or two more extreme 32 | boundary dates, such that the time series are continuous date-wise. Fills in any missing date with either 0 or the 33 | most recent non-missing value. 34 | } 35 | \details{ 36 | The \code{dateBefore} and \code{dateAfter} dates are converted according to the \code{sento_measures[["by"]]} 37 | frequency. 38 | } 39 | \examples{ 40 | # construct a sento_measures object to start with 41 | corpus <- sento_corpus(corpusdf = sentometrics::usnews) 42 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 43 | l <- sento_lexicons(sentometrics::list_lexicons[c("LM_en", "HENRY_en")], 44 | sentometrics::list_valence_shifters[["en"]]) 45 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "day", lag = 7, fill = "none") 46 | sento_measures <- sento_measures(corpusSample, l, ctr) 47 | 48 | # fill measures 49 | f1 <- measures_fill(sento_measures) 50 | f2 <- measures_fill(sento_measures, fill = "latest") 51 | f3 <- measures_fill(sento_measures, fill = "zero", 52 | dateBefore = get_dates(sento_measures)[1] - 10, 53 | dateAfter = tail(get_dates(sento_measures), 1) + 15) 54 | 55 | } 56 | \author{ 57 | Samuel Borms 58 | } 59 | -------------------------------------------------------------------------------- /man/measures_update.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_measures_xyz.R 3 | \name{measures_update} 4 | \alias{measures_update} 5 | \title{Update sentiment measures} 6 | \usage{ 7 | measures_update(sento_measures, sento_corpus, lexicons) 8 | } 9 | \arguments{ 10 | \item{sento_measures}{\code{sento_measures} object created with \code{\link{sento_measures}}} 11 | 12 | \item{sento_corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}.} 13 | 14 | \item{lexicons}{a \code{sento_lexicons} object created with \code{\link{sento_lexicons}}.} 15 | } 16 | \value{ 17 | An updated \code{sento_measures} object. 18 | } 19 | \description{ 20 | Updates a \code{sento_measures} object based on a new \code{sento_corpus} provided. 21 | Sentiment for the unseen corpus texts calculated and aggregated applying the control variables 22 | from the input \code{sento_measures} object. 23 | } 24 | \examples{ 25 | data("usnews", package = "sentometrics") 26 | 27 | corpus1 <- sento_corpus(usnews[1:500, ]) 28 | corpus2 <- sento_corpus(usnews[400:2000, ]) 29 | 30 | ctr <- ctr_agg(howTime = "linear", by = "year", lag = 3) 31 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], 32 | list_valence_shifters[["en"]]) 33 | sento_measures <- sento_measures(corpus1, l, ctr) 34 | sento_measuresNew <- measures_update(sento_measures, corpus2, l) 35 | 36 | } 37 | \seealso{ 38 | \code{\link{sento_measures}}, \code{\link{compute_sentiment}} 39 | } 40 | \author{ 41 | Jeroen Van Pelt, Samuel Borms, Andres Algaba 42 | } 43 | -------------------------------------------------------------------------------- /man/merge.sentiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentiment_engines.R 3 | \name{merge.sentiment} 4 | \alias{merge.sentiment} 5 | \title{Merge sentiment objects horizontally and/or vertically} 6 | \usage{ 7 | \method{merge}{sentiment}(...) 8 | } 9 | \arguments{ 10 | \item{...}{\code{sentiment} objects to merge.} 11 | } 12 | \value{ 13 | The new, combined, \code{sentiment} object, ordered by \code{"date"} and \code{"id"}. 14 | } 15 | \description{ 16 | Combines multiple \code{sentiment} objects with possibly different column names 17 | into a new \code{sentiment} object. Here, too, any resulting \code{NA} values are converted to zero. 18 | } 19 | \examples{ 20 | data("usnews", package = "sentometrics") 21 | data("list_lexicons", package = "sentometrics") 22 | data("list_valence_shifters", package = "sentometrics") 23 | 24 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")]) 25 | l2 <- sento_lexicons(list_lexicons[c("FEEL_en_tr")]) 26 | l3 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en", "FEEL_en_tr")]) 27 | 28 | corp1 <- sento_corpus(corpusdf = usnews[1:200, ]) 29 | corp2 <- sento_corpus(corpusdf = usnews[201:450, ]) 30 | corp3 <- sento_corpus(corpusdf = usnews[401:700, ]) 31 | 32 | s1 <- compute_sentiment(corp1, l1, "proportionalPol") 33 | s2 <- compute_sentiment(corp2, l1, "counts") 34 | s3 <- compute_sentiment(corp3, l1, "counts") 35 | s4 <- compute_sentiment(corp2, l1, "counts", do.sentence = TRUE) 36 | s5 <- compute_sentiment(corp3, l2, "proportional", do.sentence = TRUE) 37 | s6 <- compute_sentiment(corp3, l1, "counts", do.sentence = TRUE) 38 | s7 <- compute_sentiment(corp3, l3, "UShaped", do.sentence = TRUE) 39 | 40 | # straightforward row-wise merge 41 | m1 <- merge(s1, s2, s3) 42 | nrow(m1) == 700 # TRUE 43 | 44 | # another straightforward row-wise merge 45 | m2 <- merge(s4, s6) 46 | 47 | # merge of sentence and non-sentence calculations 48 | m3 <- merge(s3, s6) 49 | 50 | # different methods adds columns 51 | m4 <- merge(s4, s5) 52 | nrow(m4) == nrow(m2) # TRUE 53 | 54 | # different methods and weighting adds rows and columns 55 | ## rows are added only when the different weighting 56 | ## approach for a specific method gives other sentiment values 57 | m5 <- merge(s4, s7) 58 | nrow(m5) > nrow(m4) # TRUE 59 | 60 | } 61 | \author{ 62 | Samuel Borms 63 | } 64 | -------------------------------------------------------------------------------- /man/nmeasures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{nmeasures} 4 | \alias{nmeasures} 5 | \title{Get number of sentiment measures} 6 | \usage{ 7 | nmeasures(sento_measures) 8 | } 9 | \arguments{ 10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | } 12 | \value{ 13 | The number of sentiment measures in the input \code{sento_measures} object. 14 | } 15 | \description{ 16 | Returns the number of sentiment measures. 17 | } 18 | \author{ 19 | Samuel Borms 20 | } 21 | -------------------------------------------------------------------------------- /man/nobs.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{nobs.sento_measures} 4 | \alias{nobs.sento_measures} 5 | \title{Get number of observations in the sentiment measures} 6 | \usage{ 7 | \method{nobs}{sento_measures}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{...}{not used.} 13 | } 14 | \value{ 15 | The number of rows (observations/data points) in \code{object[["measures"]]}. 16 | } 17 | \description{ 18 | Returns the number of data points available in the sentiment measures. 19 | } 20 | \author{ 21 | Samuel Borms 22 | } 23 | -------------------------------------------------------------------------------- /man/peakdates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_main.R 3 | \name{peakdates} 4 | \alias{peakdates} 5 | \title{Extract dates related to sentiment time series peaks} 6 | \usage{ 7 | peakdates(sento_measures, n = 10, type = "both", do.average = FALSE) 8 | } 9 | \arguments{ 10 | \item{sento_measures}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{n}{a positive \code{numeric} value to indicate the number of dates associated to sentiment peaks to extract. 13 | If \code{n < 1}, it is interpreted as a quantile (for example, 0.07 would mean the 7\% most extreme dates).} 14 | 15 | \item{type}{a \code{character} value, either \code{"pos"}, \code{"neg"} or \code{"both"}, respectively to look 16 | for the \code{n} dates related to the most positive, most negative or most extreme (in absolute terms) sentiment 17 | occurrences.} 18 | 19 | \item{do.average}{a \code{logical} to indicate whether peaks should be selected based on the average sentiment 20 | value per date.} 21 | } 22 | \value{ 23 | A vector of type \code{"Date"} corresponding to the \code{n} extracted sentiment peak dates. 24 | } 25 | \description{ 26 | This function extracts the dates for which aggregated time series sentiment is most 27 | extreme (lowest, highest or both in absolute terms). The extracted dates are unique, even when, 28 | for example, all most extreme sentiment values (for different sentiment measures) occur on only 29 | one date. 30 | } 31 | \examples{ 32 | set.seed(505) 33 | 34 | data("usnews", package = "sentometrics") 35 | data("list_lexicons", package = "sentometrics") 36 | data("list_valence_shifters", package = "sentometrics") 37 | 38 | # construct a sento_measures object to start with 39 | corpus <- sento_corpus(corpusdf = usnews) 40 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 41 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]]) 42 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "month", lag = 3) 43 | sento_measures <- sento_measures(corpusSample, l, ctr) 44 | 45 | # extract the peaks 46 | peaksAbs <- peakdates(sento_measures, n = 5) 47 | peaksAbsQuantile <- peakdates(sento_measures, n = 0.50) 48 | peaksPos <- peakdates(sento_measures, n = 5, type = "pos") 49 | peaksNeg <- peakdates(sento_measures, n = 5, type = "neg") 50 | 51 | } 52 | \author{ 53 | Samuel Borms 54 | } 55 | -------------------------------------------------------------------------------- /man/peakdocs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentiment_engines.R 3 | \name{peakdocs} 4 | \alias{peakdocs} 5 | \title{Extract documents related to sentiment peaks} 6 | \usage{ 7 | peakdocs(sentiment, n = 10, type = "both", do.average = FALSE) 8 | } 9 | \arguments{ 10 | \item{sentiment}{a \code{sentiment} object created using \code{\link{compute_sentiment}} or 11 | \code{\link{as.sentiment}}.} 12 | 13 | \item{n}{a positive \code{numeric} value to indicate the number of documents associated to sentiment 14 | peaks to extract. If \code{n < 1}, it is interpreted as a quantile (for example, 0.07 would mean the 15 | 7\% most extreme documents).} 16 | 17 | \item{type}{a \code{character} value, either \code{"pos"}, \code{"neg"} or \code{"both"}, respectively to look 18 | for the \code{n} documents related to the most positive, most negative or most extreme (in absolute terms) sentiment 19 | occurrences.} 20 | 21 | \item{do.average}{a \code{logical} to indicate whether peaks should be selected based on the average sentiment 22 | value per document.} 23 | } 24 | \value{ 25 | A vector of type \code{"character"} corresponding to the \code{n} extracted document identifiers. 26 | } 27 | \description{ 28 | This function extracts the documents with most extreme sentiment (lowest, highest or both 29 | in absolute terms). The extracted documents are unique, even when, for example, all most extreme 30 | sentiment values (across sentiment calculation methods) occur only for one document. 31 | } 32 | \examples{ 33 | set.seed(505) 34 | 35 | data("usnews", package = "sentometrics") 36 | data("list_lexicons", package = "sentometrics") 37 | data("list_valence_shifters", package = "sentometrics") 38 | 39 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")]) 40 | 41 | corpus <- sento_corpus(corpusdf = usnews) 42 | corpusSample <- quanteda::corpus_sample(corpus, size = 200) 43 | sent <- compute_sentiment(corpusSample, l, how = "proportionalPol") 44 | 45 | # extract the peaks 46 | peaksAbs <- peakdocs(sent, n = 5) 47 | peaksAbsQuantile <- peakdocs(sent, n = 0.50) 48 | peaksPos <- peakdocs(sent, n = 5, type = "pos") 49 | peaksNeg <- peakdocs(sent, n = 5, type = "neg") 50 | 51 | } 52 | \author{ 53 | Samuel Borms 54 | } 55 | -------------------------------------------------------------------------------- /man/plot.attributions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/attribution.R 3 | \name{plot.attributions} 4 | \alias{plot.attributions} 5 | \title{Plot prediction attributions at specified level} 6 | \usage{ 7 | \method{plot}{attributions}(x, group = "features", ...) 8 | } 9 | \arguments{ 10 | \item{x}{an \code{attributions} object created with \code{\link{attributions}}.} 11 | 12 | \item{group}{a value from \code{c("lags", "lexicons", "features", "time")}.} 13 | 14 | \item{...}{not used.} 15 | } 16 | \value{ 17 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using 18 | the \code{+} operator. By default, a legend is positioned at the top if the number of components of the 19 | dimension is at maximum twelve. 20 | } 21 | \description{ 22 | Shows a plot of the attributions along the dimension provided, stacked per date. 23 | } 24 | \details{ 25 | See \code{\link{sento_model}} for an elaborate modeling example including the calculation and plotting of 26 | attributions. This function does not handle the plotting of the attribution of individual documents, since there are 27 | often a lot of documents involved and they appear only once at one date (even though a document may contribute to 28 | predictions at several dates, depending on the number of lags in the time aggregation). 29 | } 30 | \author{ 31 | Samuel Borms, Keven Bluteau 32 | } 33 | -------------------------------------------------------------------------------- /man/plot.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{plot.sento_measures} 4 | \alias{plot.sento_measures} 5 | \title{Plot sentiment measures} 6 | \usage{ 7 | \method{plot}{sento_measures}(x, group = "all", ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{group}{a value from \code{c("lexicons", "features", "time", "all")}. The first three choices display the average of 13 | all measures from the same group, in a different color. The choice \code{"all"} displays every single sentiment measure 14 | in a separate color, but this may look visually overwhelming very fast, and can be quite slow.} 15 | 16 | \item{...}{not used.} 17 | } 18 | \value{ 19 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using 20 | the \code{+} operator (see example). By default, a legend is positioned at the top if there are at maximum twelve line 21 | graphs plotted and \code{group} is different from \code{"all"}. 22 | } 23 | \description{ 24 | Plotting method that shows all sentiment measures from the provided \code{sento_measures} 25 | object in one plot, or the average along one of the lexicons, features and time weighting dimensions. 26 | } 27 | \examples{ 28 | # construct a sento_measures object to start with 29 | corpus <- sento_corpus(corpusdf = sentometrics::usnews) 30 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 31 | l <- sento_lexicons(sentometrics::list_lexicons[c("LM_en")], 32 | sentometrics::list_valence_shifters[["en"]]) 33 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "month", lag = 3) 34 | sm <- sento_measures(corpusSample, l, ctr) 35 | 36 | # plot sentiment measures 37 | plot(sm, "features") 38 | 39 | \dontrun{ 40 | # adjust appearance of plot 41 | library("ggplot2") 42 | p <- plot(sm) 43 | p <- p + 44 | scale_x_date(name = "year", date_labels = "\%Y") + 45 | scale_y_continuous(name = "newName") 46 | p} 47 | 48 | } 49 | \author{ 50 | Samuel Borms 51 | } 52 | -------------------------------------------------------------------------------- /man/plot.sento_modelIter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomodel.R 3 | \name{plot.sento_modelIter} 4 | \alias{plot.sento_modelIter} 5 | \title{Plot iterative predictions versus realized values} 6 | \usage{ 7 | \method{plot}{sento_modelIter}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_modelIter} object created using \code{\link{sento_model}}.} 11 | 12 | \item{...}{not used.} 13 | } 14 | \value{ 15 | Returns a simple \code{\link[ggplot2]{ggplot}} object, which can be added onto (or to alter its default elements) by using 16 | the \code{+} operator. 17 | } 18 | \description{ 19 | Displays a plot of all predictions made through the iterative model computation as incorporated in the 20 | input \code{sento_modelIter} object, as well as the corresponding true values. 21 | } 22 | \details{ 23 | See \code{\link{sento_model}} for an elaborate modeling example including the plotting of out-of-sample 24 | performance. 25 | } 26 | \author{ 27 | Samuel Borms 28 | } 29 | -------------------------------------------------------------------------------- /man/predict.sento_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomodel.R 3 | \name{predict.sento_model} 4 | \alias{predict.sento_model} 5 | \title{Make predictions from a sento_model object} 6 | \usage{ 7 | \method{predict}{sento_model}(object, newx, type = "response", offset = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{object}{a \code{sento_model} object created with \code{\link{sento_model}}.} 11 | 12 | \item{newx}{a data \code{matrix} used for the prediction(s), row-by-row; see 13 | \code{\link[glmnet]{predict.glmnet}}. The number of columns should be equal to \code{sum(sento_model$nVar)}, being the 14 | number of original sentiment measures and other variables. The variables discarded in the regression process are 15 | dealt with within this function, based on \code{sento_model$discarded}.} 16 | 17 | \item{type}{type of prediction required, a value from \code{c("link", "response", "class")}, see documentation for 18 | \code{\link[glmnet]{predict.glmnet}}.} 19 | 20 | \item{offset}{not used.} 21 | 22 | \item{...}{not used.} 23 | } 24 | \value{ 25 | A prediction output depending on the \code{type} argument. 26 | } 27 | \description{ 28 | Prediction method for \code{sento_model} class, with usage along the lines of 29 | \code{\link[glmnet]{predict.glmnet}}, but simplified in terms of parameters. 30 | } 31 | \seealso{ 32 | \code{\link[glmnet]{predict.glmnet}}, \code{\link{sento_model}} 33 | } 34 | \author{ 35 | Samuel Borms 36 | } 37 | -------------------------------------------------------------------------------- /man/scale.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{scale.sento_measures} 4 | \alias{scale.sento_measures} 5 | \title{Scaling and centering of sentiment measures} 6 | \usage{ 7 | \method{scale}{sento_measures}(x, center = TRUE, scale = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{center}{a \code{logical} or a \code{numeric} vector, see documentation for the generic \code{\link{scale}}. 13 | Alternatively, one can provide a \code{matrix} of dimensions \code{nobs(sento_measures)} times \code{1} or 14 | \code{nmeasures(sento_measures)} with values to subtract from each individual observation.} 15 | 16 | \item{scale}{a \code{logical} or a \code{numeric} vector, see documentation for the generic \code{\link{scale}}. 17 | Alternatively, one can provide a \code{matrix} of dimensions \code{nobs(sento_measures)} times \code{1} or 18 | \code{nmeasures(sento_measures)} with values to divide each individual observation by.} 19 | } 20 | \value{ 21 | A modified \code{sento_measures} object, with the measures replaced by the scaled measures as well as updated 22 | statistics. 23 | } 24 | \description{ 25 | Scales and centers the sentiment measures from a \code{sento_measures} object, column-per-column. By default, 26 | the measures are normalized. \code{NA}s are removed first. 27 | } 28 | \details{ 29 | If one of the arguments \code{center} or \code{scale} is a \code{matrix}, this operation will be applied first, 30 | and eventual other centering or scaling is computed on that data. 31 | } 32 | \examples{ 33 | data("usnews", package = "sentometrics") 34 | data("list_lexicons", package = "sentometrics") 35 | data("list_valence_shifters", package = "sentometrics") 36 | 37 | set.seed(505) 38 | 39 | # construct a sento_measures object to start with 40 | corpus <- sento_corpus(corpusdf = usnews) 41 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 42 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")]) 43 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3) 44 | sento_measures <- sento_measures(corpusSample, l, ctr) 45 | 46 | # scale sentiment measures to zero mean and unit standard deviation 47 | sc1 <- scale(sento_measures) 48 | 49 | n <- nobs(sento_measures) 50 | m <- nmeasures(sento_measures) 51 | 52 | # subtract a matrix 53 | sc2 <- scale(sento_measures, center = matrix(runif(n * m), n, m), scale = FALSE) 54 | 55 | # divide every row observation based on a one-column matrix, then center 56 | sc3 <- scale(sento_measures, center = TRUE, scale = matrix(runif(n))) 57 | 58 | } 59 | \author{ 60 | Samuel Borms 61 | } 62 | -------------------------------------------------------------------------------- /man/sento_corpus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentocorpus.R 3 | \name{sento_corpus} 4 | \alias{sento_corpus} 5 | \title{Create a sento_corpus object} 6 | \usage{ 7 | sento_corpus(corpusdf, do.clean = FALSE) 8 | } 9 | \arguments{ 10 | \item{corpusdf}{a \code{data.frame} (or a \code{data.table}, or a \code{tbl}) with as named columns: a document \code{"id"} 11 | column (coercible to \code{character} mode), a \code{"date"} column (as \code{"yyyy-mm-dd"}), a \code{"texts"} column 12 | (in \code{character} mode), an optional \code{"language"} column (in \code{character} mode), and a series of 13 | feature columns of type \code{numeric}, with values between 0 and 1 to specify the degree of connectedness of 14 | a feature to a document. Features could be for instance topics (e.g., legal or economic) or article sources (e.g., online or 15 | print). When no feature column is provided, a feature named \code{"dummyFeature"} 16 | is added. All spaces in the names of the features are replaced by \code{'_'}. Feature columns with values not 17 | between 0 and 1 are rescaled column-wise.} 18 | 19 | \item{do.clean}{a \code{logical}, if \code{TRUE} all texts undergo a cleaning routine to eliminate common textual garbage. 20 | This includes a brute force replacement of HTML tags and non-alphanumeric characters by an empty string. To use with care 21 | if the text is meant to have non-alphanumeric characters! Preferably, cleaning is done outside of this function call.} 22 | } 23 | \value{ 24 | A \code{sento_corpus} object, derived from a \pkg{quanteda} \code{\link[quanteda]{corpus}} 25 | object. The corpus is ordered by date. 26 | } 27 | \description{ 28 | Formalizes a collection of texts into a \code{sento_corpus} object derived from the \pkg{quanteda} 29 | \code{\link[quanteda]{corpus}} object. The \pkg{quanteda} package provides a robust text mining infrastructure 30 | (see their \href{http://quanteda.io/index.html}{website}), including a handy corpus manipulation toolset. This function 31 | performs a set of checks on the input data and prepares the corpus for further analysis by structurally 32 | integrating a date dimension and numeric metadata features. 33 | } 34 | \details{ 35 | A \code{sento_corpus} object is a specialized instance of a \pkg{quanteda} \code{\link[quanteda]{corpus}}. Any 36 | \pkg{quanteda} function applicable to its \code{\link[quanteda]{corpus}} object can also be applied to a \code{sento_corpus} 37 | object. However, changing a given \code{sento_corpus} object too drastically using some of \pkg{quanteda}'s functions might 38 | alter the very structure the corpus is meant to have (as defined in the \code{corpusdf} argument) to be able to be used as 39 | an input in other functions of the \pkg{sentometrics} package. There are functions, including 40 | \code{\link[quanteda]{corpus_sample}} or \code{\link[quanteda]{corpus_subset}}, that do not change the actual corpus 41 | structure and may come in handy. 42 | 43 | To add additional features, use \code{\link{add_features}}. Binary features are useful as 44 | a mechanism to select the texts which have to be integrated in the respective feature-based sentiment measure(s), but 45 | applies only when \code{do.ignoreZeros = TRUE}. Because of this (implicit) selection that can be performed, having 46 | complementary features (e.g., \code{"economy"} and \code{"noneconomy"}) makes sense. 47 | 48 | It is also possible to add one non-numerical feature, that is, \code{"language"}, to designate the language 49 | of the corpus texts. When this feature is provided, a \code{list} of lexicons for different 50 | languages is expected in the \code{compute_sentiment} function. 51 | } 52 | \examples{ 53 | data("usnews", package = "sentometrics") 54 | 55 | # corpus construction 56 | corp <- sento_corpus(corpusdf = usnews) 57 | 58 | # take a random subset making use of quanteda 59 | corpusSmall <- quanteda::corpus_sample(corp, size = 500) 60 | 61 | # deleting a feature 62 | quanteda::docvars(corp, field = "wapo") <- NULL 63 | 64 | # deleting all features results in the addition of a dummy feature 65 | quanteda::docvars(corp, field = c("economy", "noneconomy", "wsj")) <- NULL 66 | 67 | \dontrun{ 68 | # to add or replace features, use the add_features() function... 69 | quanteda::docvars(corp, field = c("wsj", "new")) <- 1} 70 | 71 | # corpus creation when no features are present 72 | corpusDummy <- sento_corpus(corpusdf = usnews[, 1:3]) 73 | 74 | # corpus creation with a qualitative language feature 75 | usnews[["language"]] <- "en" 76 | usnews[["language"]][c(200:400)] <- "nl" 77 | corpusLang <- sento_corpus(corpusdf = usnews) 78 | 79 | } 80 | \seealso{ 81 | \code{\link[quanteda]{corpus}}, \code{\link{add_features}} 82 | } 83 | \author{ 84 | Samuel Borms 85 | } 86 | -------------------------------------------------------------------------------- /man/sento_lexicons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentolexicons.R 3 | \name{sento_lexicons} 4 | \alias{sento_lexicons} 5 | \title{Set up lexicons (and valence word list) for use in sentiment analysis} 6 | \usage{ 7 | sento_lexicons(lexiconsIn, valenceIn = NULL, do.split = FALSE) 8 | } 9 | \arguments{ 10 | \item{lexiconsIn}{a named \code{list} of (raw) lexicons, each element as a \code{data.table} or a \code{data.frame} with 11 | respectively a \code{character} column (the words) and a \code{numeric} column (the polarity scores). This argument can be 12 | one of the built-in lexicons accessible via \code{sentometrics::list_lexicons}.} 13 | 14 | \item{valenceIn}{a single valence word list as a \code{data.table} or a \code{data.frame} with respectively a \code{"x"} 15 | and a \code{"y"} or \code{"t"} column. The first column has the words, \code{"y"} has the values for bigram 16 | shifting, and \code{"t"} has the types of the valence shifter for a clustered approach to sentiment calculation 17 | (supported types: \code{1} = negators, \code{2} = amplifiers, \code{3} = deamplifiers, \code{4} = adversative conjunctions). 18 | Type \code{4} is only used in a clusters-based sentence-level sentiment calculation. 19 | If three columns are provided, only the first two will be considered. This argument can be one of the 20 | built-in valence word lists accessible via \code{sentometrics::list_valence_shifters}. A word that appears in both a 21 | lexicon and the valence word list is prioritized as a lexical entry during sentiment calculation. If 22 | \code{NULL}, valence shifting is not applied in the sentiment analysis.} 23 | 24 | \item{do.split}{a \code{logical} that if \code{TRUE} splits every lexicon into a separate positive polarity and negative 25 | polarity lexicon.} 26 | } 27 | \value{ 28 | A \code{list} of class \code{sento_lexicons} with each lexicon as a separate element according to its name, as a 29 | \code{data.table}, and optionally an element named \code{valence} that comprises the valence words. Every \code{"x"} column 30 | contains the words, every \code{"y"} column contains the scores. The \code{"t"} column for valence shifters 31 | contains the different types. 32 | } 33 | \description{ 34 | Structures provided lexicon(s) and optionally valence words. One can for example combine (part of) the 35 | built-in lexicons from \code{data("list_lexicons")} with other lexicons, and add one of the built-in valence word lists 36 | from \code{data("list_valence_shifters")}. This function makes the output coherent, by converting all words to 37 | lowercase and checking for duplicates. All entries consisting of more than one word are discarded, as required for 38 | bag-of-words sentiment analysis. 39 | } 40 | \examples{ 41 | data("list_lexicons", package = "sentometrics") 42 | data("list_valence_shifters", package = "sentometrics") 43 | 44 | # lexicons straight from built-in word lists 45 | l1 <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")]) 46 | 47 | # including a self-made lexicon, with and without valence shifters 48 | lexIn <- c(list(myLexicon = data.table::data.table(w = c("nice", "boring"), s = c(2, -1))), 49 | list_lexicons[c("GI_en")]) 50 | valIn <- list_valence_shifters[["en"]] 51 | l2 <- sento_lexicons(lexIn) 52 | l3 <- sento_lexicons(lexIn, valIn) 53 | l4 <- sento_lexicons(lexIn, valIn[, c("x", "y")], do.split = TRUE) 54 | l5 <- sento_lexicons(lexIn, valIn[, c("x", "t")], do.split = TRUE) 55 | l6 <- l5[c("GI_en_POS", "valence")] # preserves sento_lexicons class 56 | 57 | \dontrun{ 58 | # include lexicons from lexicon package 59 | lexIn2 <- list(hul = lexicon::hash_sentiment_huliu, joc = lexicon::hash_sentiment_jockers) 60 | l7 <- sento_lexicons(c(lexIn, lexIn2), valIn)} 61 | 62 | \dontrun{ 63 | # faulty extraction, no replacement allowed 64 | l5["valence"] 65 | l2[0] 66 | l3[22] 67 | l4[1] <- l2[1] 68 | l4[[1]] <- l2[[1]] 69 | l4$GI_en_NEG <- l2$myLexicon} 70 | 71 | } 72 | \author{ 73 | Samuel Borms 74 | } 75 | -------------------------------------------------------------------------------- /man/sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_main.R 3 | \name{sento_measures} 4 | \alias{sento_measures} 5 | \title{One-way road towards a sento_measures object} 6 | \usage{ 7 | sento_measures(sento_corpus, lexicons, ctr) 8 | } 9 | \arguments{ 10 | \item{sento_corpus}{a \code{sento_corpus} object created with \code{\link{sento_corpus}}.} 11 | 12 | \item{lexicons}{a \code{sentolexicons} object created with \code{\link{sento_lexicons}}.} 13 | 14 | \item{ctr}{output from a \code{\link{ctr_agg}} call.} 15 | } 16 | \value{ 17 | A \code{sento_measures} object, which is a \code{list} containing: 18 | \item{measures}{a \code{data.table} with a \code{"date"} column and all textual sentiment measures as remaining columns.} 19 | \item{features}{a \code{character} vector of the different features.} 20 | \item{lexicons}{a \code{character} vector of the different lexicons used.} 21 | \item{time}{a \code{character} vector of the different time weighting schemes used.} 22 | \item{stats}{a \code{data.frame} with some elementary statistics (mean, standard deviation, maximum, minimum, and 23 | average correlation with the other measures) for each individual sentiment measure. In all computations, NAs are 24 | removed first.} 25 | \item{sentiment}{the document-level sentiment scores \code{data.table} with \code{"date"}, 26 | \code{"word_count"} and lexicon-feature sentiment scores columns. The \code{"date"} column has the 27 | dates converted at the frequency for across-document aggregation. All zeros are replaced by \code{NA} 28 | if \code{ctr$docs$weightingParam$do.ignoreZeros = TRUE}.} 29 | \item{attribWeights}{a \code{list} of document and time weights used in the \code{\link{attributions}} function. 30 | Serves further no direct purpose.} 31 | \item{ctr}{a \code{list} encapsulating the control parameters.} 32 | } 33 | \description{ 34 | Wrapper function which assembles calls to \code{\link{compute_sentiment}} and \code{\link{aggregate}}. 35 | Serves as the most direct way towards a panel of textual sentiment measures as a \code{sento_measures} object. 36 | } 37 | \details{ 38 | As a general rule, neither the names of the features, lexicons or time weighting schemes may contain 39 | any `-' symbol. 40 | } 41 | \examples{ 42 | data("usnews", package = "sentometrics") 43 | data("list_lexicons", package = "sentometrics") 44 | data("list_valence_shifters", package = "sentometrics") 45 | 46 | # construct a sento_measures object to start with 47 | corpus <- sento_corpus(corpusdf = usnews) 48 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 49 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")], list_valence_shifters[["en"]]) 50 | ctr <- ctr_agg(howWithin = "counts", 51 | howDocs = "proportional", 52 | howTime = c("equal_weight", "linear", "almon"), 53 | by = "month", 54 | lag = 3, 55 | ordersAlm = 1:3, 56 | do.inverseAlm = TRUE) 57 | sento_measures <- sento_measures(corpusSample, l, ctr) 58 | summary(sento_measures) 59 | 60 | } 61 | \seealso{ 62 | \code{\link{compute_sentiment}}, \code{\link{aggregate}}, \code{\link{measures_update}} 63 | } 64 | \author{ 65 | Samuel Borms, Keven Bluteau 66 | } 67 | -------------------------------------------------------------------------------- /man/sentometrics-defunct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \name{sentometrics-defunct} 4 | \alias{sentometrics-defunct} 5 | \alias{ctr_merge} 6 | \alias{perform_MCS} 7 | \alias{fill_measures} 8 | \alias{merge_measures} 9 | \alias{to_global} 10 | \alias{subset_measures} 11 | \alias{select_measures} 12 | \alias{setup_lexicons} 13 | \alias{retrieve_attributions} 14 | \alias{perform_agg} 15 | \alias{plot_attributions} 16 | \alias{almons} 17 | \alias{exponentials} 18 | \alias{to_sentocorpus} 19 | \alias{to_sentiment} 20 | \alias{get_measures} 21 | \alias{measures_subset} 22 | \alias{measures_select} 23 | \alias{measures_delete} 24 | \alias{sentiment_bind} 25 | \alias{measures_merge} 26 | \alias{measures_global} 27 | \alias{sento_app} 28 | \title{Defunct functions} 29 | \usage{ 30 | ctr_merge(...) 31 | 32 | perform_MCS(...) 33 | 34 | fill_measures(...) 35 | 36 | merge_measures(...) 37 | 38 | to_global(...) 39 | 40 | subset_measures(...) 41 | 42 | select_measures(...) 43 | 44 | setup_lexicons(...) 45 | 46 | retrieve_attributions(...) 47 | 48 | perform_agg(...) 49 | 50 | plot_attributions(...) 51 | 52 | almons(...) 53 | 54 | exponentials(...) 55 | 56 | to_sentocorpus(...) 57 | 58 | to_sentiment(...) 59 | 60 | get_measures(...) 61 | 62 | measures_subset(...) 63 | 64 | measures_select(...) 65 | 66 | measures_delete(...) 67 | 68 | sentiment_bind(...) 69 | 70 | measures_merge(...) 71 | 72 | measures_global(...) 73 | 74 | sento_app(...) 75 | } 76 | \arguments{ 77 | \item{...}{allowed input arguments.} 78 | } 79 | \description{ 80 | Functions defunct due to changed naming or because functionality is discarded. See the NEWS file for more information 81 | about since when or why functions have been defunct. 82 | } 83 | \keyword{internal} 84 | -------------------------------------------------------------------------------- /man/sentometrics-deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \name{sentometrics-deprecated} 4 | \alias{sentometrics-deprecated} 5 | \title{Deprecated functions} 6 | \description{ 7 | Functions deprecated due to changed naming or because functionality is discarded. The general (but not 8 | blindly followed) rule is that deprecated functions are made defunct every 1 major or every 2 minor 9 | package updates. See the NEWS file for more information about since when or why functions have been 10 | deprecated. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/sentometrics-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentometrics.R 3 | \docType{package} 4 | \name{sentometrics-package} 5 | \alias{sentometrics} 6 | \alias{sentometrics-package} 7 | \title{sentometrics: An Integrated Framework for Textual Sentiment Time Series Aggregation and Prediction} 8 | \description{ 9 | The \pkg{sentometrics} package is an integrated framework for textual sentiment time series 10 | aggregation and prediction. It accounts for the intrinsic challenge that, for a given text, sentiment can 11 | be computed in many different ways, as well as the large number of possibilities to pool sentiment across 12 | texts and time. This additional layer of manipulation does not exist in standard text mining and time series 13 | analysis packages. The package therefore integrates the fast \emph{quantification} of sentiment from texts, 14 | the \emph{aggregation} into different sentiment time series and the optimized \emph{prediction} based on 15 | these measures. 16 | } 17 | \note{ 18 | Please cite the package in publications. Use \code{citation("sentometrics")}. 19 | } 20 | \section{Main functions}{ 21 | 22 | \itemize{ 23 | \item Corpus (features) generation: \code{\link{sento_corpus}}, \code{\link{add_features}}, 24 | \code{\link{as.sento_corpus}} 25 | \item Sentiment computation and aggregation into sentiment measures: \code{\link{ctr_agg}}, 26 | \code{\link{sento_lexicons}}, \code{\link{compute_sentiment}}, \code{\link{aggregate.sentiment}}, 27 | \code{\link{as.sentiment}}, \code{\link{sento_measures}}, \code{\link{peakdocs}}, 28 | \code{\link{peakdates}}, \code{\link{aggregate.sento_measures}} 29 | \item Sparse modeling: \code{\link{ctr_model}}, \code{\link{sento_model}} 30 | \item Prediction and post-modeling analysis: \code{\link{predict.sento_model}}, 31 | \code{\link{attributions}} 32 | } 33 | } 34 | 35 | \references{ 36 | Ardia, Bluteau, Borms and Boudt (2021). \strong{The R Package sentometrics to Compute, Aggregate, and 37 | Predict with Textual Sentiment}. \emph{Journal of Statistical Software 99(2), 1-40}, 38 | \doi{10.18637/jss.v099.i02}. 39 | 40 | Ardia, Bluteau and Boudt (2019). \strong{Questioning the news about economic growth: Sparse forecasting using 41 | thousands of news-based sentiment values}. \emph{International Journal of Forecasting 35, 1370-1386}, 42 | \doi{10.1016/j.ijforecast.2018.10.010}. 43 | } 44 | \seealso{ 45 | Useful links: 46 | \itemize{ 47 | \item \url{https://sentometrics-research.com/sentometrics/} 48 | \item Report bugs at \url{https://github.com/SentometricsResearch/sentometrics/issues} 49 | } 50 | 51 | } 52 | \author{ 53 | \strong{Maintainer}: Samuel Borms \email{borms_sam@hotmail.com} (\href{https://orcid.org/0000-0001-9533-1870}{ORCID}) 54 | 55 | Authors: 56 | \itemize{ 57 | \item David Ardia \email{david.ardia@hec.ca} (\href{https://orcid.org/0000-0003-2823-782X}{ORCID}) 58 | \item Keven Bluteau \email{keven.bluteau@usherbrooke.ca} (\href{https://orcid.org/0000-0003-2990-4807}{ORCID}) 59 | \item Kris Boudt \email{kris.boudt@vub.be} (\href{https://orcid.org/0000-0002-1000-5142}{ORCID}) 60 | } 61 | 62 | Other contributors: 63 | \itemize{ 64 | \item Jeroen Van Pelt \email{jeroenvanpelt@hotmail.com} [contributor] 65 | \item Andres Algaba \email{andres.algaba@vub.be} [contributor] 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /man/subset.sento_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentomeasures_methods.R 3 | \name{subset.sento_measures} 4 | \alias{subset.sento_measures} 5 | \title{Subset sentiment measures} 6 | \usage{ 7 | \method{subset}{sento_measures}(x, subset = NULL, select = NULL, delete = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{sento_measures} object created using \code{\link{sento_measures}}.} 11 | 12 | \item{subset}{a logical (non-\code{character}) expression indicating the rows to keep. If a 13 | \code{numeric} input is given, it is used for row index subsetting.} 14 | 15 | \item{select}{a \code{character} vector of the lexicon, feature and time weighting scheme names, to indicate which 16 | measures need to be selected, or as a \code{list} of \code{character} vectors, possibly with separately specified 17 | combinations (consisting of one unique lexicon, one unique feature, and one unique time weighting scheme at maximum).} 18 | 19 | \item{delete}{see the \code{select} argument, but to delete measures.} 20 | 21 | \item{...}{not used.} 22 | } 23 | \value{ 24 | A modified \code{sento_measures} object, with only the remaining rows and sentiment measures, 25 | including updated information and statistics, but the original sentiment scores \code{data.table} untouched. 26 | } 27 | \description{ 28 | Subsets rows of the sentiment measures based on its columns. 29 | } 30 | \examples{ 31 | data("usnews", package = "sentometrics") 32 | data("list_lexicons", package = "sentometrics") 33 | data("list_valence_shifters", package = "sentometrics") 34 | 35 | # construct a sento_measures object to start with 36 | corpus <- sento_corpus(corpusdf = usnews) 37 | corpusSample <- quanteda::corpus_sample(corpus, size = 500) 38 | l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")]) 39 | ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3) 40 | sm <- sento_measures(corpusSample, l, ctr) 41 | 42 | # three specified indices in required list format 43 | three <- as.list( 44 | stringi::stri_split(c("LM_en--economy--linear", 45 | "HENRY_en--wsj--equal_weight", 46 | "HENRY_en--wapo--equal_weight"), 47 | regex = "--") 48 | ) 49 | 50 | # different subsets 51 | sub1 <- subset(sm, HENRY_en--economy--equal_weight >= 0.01) 52 | sub2 <- subset(sm, date \%in\% get_dates(sm)[3:12]) 53 | sub3 <- subset(sm, 3:12) 54 | sub4 <- subset(sm, 1:100) # warning 55 | 56 | # different selections 57 | sel1 <- subset(sm, select = "equal_weight") 58 | sel2 <- subset(sm, select = c("equal_weight", "linear")) 59 | sel3 <- subset(sm, select = c("linear", "LM_en")) 60 | sel4 <- subset(sm, select = list(c("linear", "wsj"), c("linear", "economy"))) 61 | sel5 <- subset(sm, select = three) 62 | 63 | # different deletions 64 | del1 <- subset(sm, delete = "equal_weight") 65 | del2 <- subset(sm, delete = c("linear", "LM_en")) 66 | del3 <- subset(sm, delete = list(c("linear", "wsj"), c("linear", "economy"))) 67 | del4 <- subset(sm, delete = c("equal_weight", "linear")) # warning 68 | del5 <- subset(sm, delete = three) 69 | 70 | } 71 | \author{ 72 | Samuel Borms 73 | } 74 | -------------------------------------------------------------------------------- /man/usnews.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sentometrics.R 3 | \docType{data} 4 | \name{usnews} 5 | \alias{usnews} 6 | \title{Texts (not) relevant to the U.S. economy} 7 | \format{ 8 | A \code{data.frame}, formatted as required to be an input for \code{\link{sento_corpus}}. 9 | } 10 | \source{ 11 | \strong{Economic News Article Tone and Relevance} dataset. Retrieved 12 | November 1, 2017. 13 | } 14 | \usage{ 15 | data("usnews") 16 | } 17 | \description{ 18 | A collection of texts annotated by humans in terms of relevance to the U.S. economy or not. The texts come from two major 19 | journals in the U.S. (The Wall Street Journal and The Washington Post) and cover 4145 documents between 1995 and 2014. It 20 | contains following information: 21 | 22 | \itemize{ 23 | \item id. A \code{character} ID identifier. 24 | \item date. Date as \code{"yyyy-mm-dd"}. 25 | \item texts. Texts in \code{character} format. 26 | \item wsj. Equals 1 if the article comes from The Wall Street Journal. 27 | \item wapo. Equals 1 if the article comes from The Washington Post (complementary to `wsj'). 28 | \item economy. Equals 1 if the article is relevant to the U.S. economy. 29 | \item noneconomy. Equals 1 if the article is not relevant to the U.S. economy (complementary to `economy'). 30 | } 31 | } 32 | \examples{ 33 | data("usnews", package = "sentometrics") 34 | usnews[3192, "texts"] 35 | usnews[1:5, c("id", "date", "texts")] 36 | 37 | } 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /man/weights_almon.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{weights_almon} 4 | \alias{weights_almon} 5 | \title{Compute Almon polynomials} 6 | \usage{ 7 | weights_almon(n, orders = 1:3, do.inverse = TRUE, do.normalize = TRUE) 8 | } 9 | \arguments{ 10 | \item{n}{a single \code{numeric} to indicate the lag length (cf., \emph{n}).} 11 | 12 | \item{orders}{a \code{numeric} vector as the sequence of the Almon orders (cf., \emph{r}). The maximum value 13 | corresponds to \emph{R}.} 14 | 15 | \item{do.inverse}{\code{TRUE} if the inverse Almon polynomials should be calculated as well.} 16 | 17 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.} 18 | } 19 | \value{ 20 | A \code{data.frame} of all Almon polynomial weighting curves, of size \code{length(orders)} (times two if 21 | \code{do.inverse = TRUE}). 22 | } 23 | \description{ 24 | Computes Almon polynomial weighting curves. Handy to self-select specific time aggregation weighting schemes 25 | for input in \code{\link{ctr_agg}} using the \code{weights} argument. 26 | } 27 | \details{ 28 | The Almon polynomial formula implemented is: 29 | \eqn{(1 - (1 - i/n)^{r})(1 - i/n)^{R - r}}{(1 - (1 - i/n)^r) * (1 - i/n)^(R - r)}, where \eqn{i} is the lag index ordered from 30 | 1 to \eqn{n}. The inverse is computed by changing \eqn{i/n} to \eqn{1 - i/n}. 31 | } 32 | \seealso{ 33 | \code{\link{ctr_agg}} 34 | } 35 | -------------------------------------------------------------------------------- /man/weights_beta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{weights_beta} 4 | \alias{weights_beta} 5 | \title{Compute Beta weighting curves} 6 | \usage{ 7 | weights_beta(n, a = 1:4, b = 1:4, do.normalize = TRUE) 8 | } 9 | \arguments{ 10 | \item{n}{a single \code{numeric} to indicate the lag length (cf., \emph{n}).} 11 | 12 | \item{a}{a \code{numeric} as the first parameter (cf., \emph{a}).} 13 | 14 | \item{b}{a \code{numeric} as the second parameter (cf., \emph{b}).} 15 | 16 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.} 17 | } 18 | \value{ 19 | A \code{data.frame} of beta weighting curves per combination of \code{a} and \code{b}. If \code{n = 1}, 20 | all weights are set to 1. 21 | } 22 | \description{ 23 | Computes Beta weighting curves as in Ghysels, Sinko and Valkanov (2007). Handy to self-select specific 24 | time aggregation weighting schemes for input in \code{\link{ctr_agg}} using the \code{weights} argument. 25 | } 26 | \details{ 27 | The Beta weighting abides by following formula: 28 | \eqn{f(i/n; a, b) / \sum_{i}(i/n; a, b)}{f(i/n; a, b) / \sum(i/n; a, b)}, where \eqn{i} is the lag index ordered 29 | from 1 to \eqn{n}, \eqn{a} and \eqn{b} are two decay parameters, and 30 | \eqn{f(x; a, b) = (x^{a - 1}(1 - x)^{b - 1}\Gamma(a + b)) / (\Gamma(a)\Gamma(b))}{f(x; a, b) 31 | = (x^(a - 1) * (1 - x)^(b - 1) * T(a + b)) / (T(a) * T(b))}, where \eqn{\Gamma(.)}{T(.)} is 32 | the \code{\link{gamma}} function. 33 | } 34 | \references{ 35 | Ghysels, Sinko and Valkanov (2007). \strong{MIDAS regressions: Further results and new directions}. 36 | \emph{Econometric Reviews 26, 53-90}, \doi{10.1080/07474930600972467}. 37 | } 38 | \seealso{ 39 | \code{\link{ctr_agg}} 40 | } 41 | -------------------------------------------------------------------------------- /man/weights_exponential.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{weights_exponential} 4 | \alias{weights_exponential} 5 | \title{Compute exponential weighting curves} 6 | \usage{ 7 | weights_exponential( 8 | n, 9 | alphas = seq(0.1, 0.5, by = 0.1), 10 | do.inverse = FALSE, 11 | do.normalize = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{n}{a single \code{numeric} to indicate the lag length.} 16 | 17 | \item{alphas}{a \code{numeric} vector of decay factors, between 0 and 1, but multiplied by 10 in 18 | the implementation.} 19 | 20 | \item{do.inverse}{\code{TRUE} if the inverse exponential curves should be calculated as well.} 21 | 22 | \item{do.normalize}{a \code{logical}, if \code{TRUE} weights are normalized to unity.} 23 | } 24 | \value{ 25 | A \code{data.frame} of exponential weighting curves per value of \code{alphas}. 26 | } 27 | \description{ 28 | Computes exponential weighting curves. Handy to self-select specific time aggregation weighting schemes 29 | for input in \code{\link{ctr_agg}} using the \code{weights} argument. 30 | } 31 | \seealso{ 32 | \code{\link{ctr_agg}} 33 | } 34 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://sentometricsresearch.github.io/sentometrics 2 | 3 | destination: docs 4 | 5 | template: 6 | params: 7 | bootswatch: flatly # https://bootswatch.com 8 | docsearch: 9 | api_key: 29d61aa2be101325ab9a82514b58064b 10 | index_name: sentometrics 11 | 12 | toc: 13 | depth: 3 14 | 15 | navbar: 16 | structure: 17 | left: [home, intro, articles, contributions, news, reference] 18 | right: [docsearch, github] 19 | components: 20 | articles: 21 | text: Examples 22 | menu: 23 | - text: Tutorials 24 | - text: Corpus manipulation 25 | href: articles/examples/corpus.html 26 | - text: Sentiment computation 27 | href: articles/examples/sentiment.html 28 | - text: Index aggregation 29 | href: articles/examples/indexation.html 30 | - text: Modeling 31 | href: articles/examples/modeling.html 32 | - text: ------- 33 | - text: Applications 34 | - text: Creating EPU indices 35 | href: articles/applications/epu.html 36 | - text: Predicting the VIX index 37 | href: articles/applications/vix.html 38 | contributions: 39 | text: Contributions 40 | menu: 41 | - text: Analyzing Gopress data 42 | href: articles/contributions/gopress.html 43 | - text: Intratextual sentiment analysis 44 | href: articles/contributions/isa.html 45 | news: 46 | text: News 47 | menu: 48 | - text: Development 49 | href: articles/development.html 50 | - text: Releases 51 | href: news/index.html 52 | reference: ~ 53 | 54 | authors: 55 | Samuel Borms: 56 | href: https://www.linkedin.com/in/sam-borms 57 | David Ardia: 58 | href: https://ardiad.github.io 59 | Keven Bluteau: 60 | href: https://www.kevenbluteau.com 61 | Kris Boudt: 62 | href: https://linkedin.com/in/krisboudt 63 | Jeroen Van Pelt: 64 | href: https://linkedin.com/in/vanpeltjeroen 65 | Andres Algaba: 66 | href: https://linkedin.com/in/andresalgaba 67 | 68 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()") 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DRCPP_PARALLEL_USE_TBB=1 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()") 3 | -------------------------------------------------------------------------------- /src/SentimentScorerBigrams.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef SENTIMENT_BIGRAMS 3 | #define SENTIMENT_BIGRAMS 4 | 5 | struct SentimentScorerBigrams : public RcppParallel::Worker { 6 | 7 | // thread-safe input 8 | const std::vector< std::vector > texts; 9 | const std::unordered_map< std::string, std::vector< double > > lexiconMap; 10 | const std::unordered_map< std::string, double > valenceMap; 11 | const std::string how; 12 | const int nL; 13 | const int N; 14 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap; 15 | std::unordered_map< std::string, double > inverseFrequencyMap; 16 | const bool isFreqWeighting; 17 | 18 | // output 19 | RcppParallel::RMatrix< double > sentScores; 20 | 21 | SentimentScorerBigrams(const std::vector< std::vector< std::string > > texts, 22 | const std::unordered_map< std::string, std::vector< double > > lexiconMap, 23 | const std::unordered_map< std::string, double > valenceMap, 24 | const std::string how, 25 | int nL, 26 | int N, 27 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap, 28 | std::unordered_map< std::string, double > inverseFrequencyMap, 29 | const bool isFreqWeighting, 30 | Rcpp::NumericMatrix sentScores) 31 | : texts(texts), lexiconMap(lexiconMap), valenceMap(valenceMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap), 32 | inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {} 33 | 34 | void operator()(std::size_t begin, std::size_t end) { 35 | 36 | for (std::size_t i = begin; i < end; i++) { 37 | 38 | std::vector< std::string > tokens = texts[i]; 39 | std::vector< double > scores(nL, 0.0); 40 | std::vector< double > nPolarized(nL, 0.0); 41 | double normalizer = 0.0; 42 | int nTokens = tokens.size(); 43 | int nPuncts = 0; 44 | std::vector< double > tokenShifters(nTokens, 1.0); 45 | std::vector< double > tokenWeights(nTokens, 0.0); 46 | std::vector< std::vector< double > > tokenScores(nTokens,std::vector< double >(nL, 0.0)); 47 | std::unordered_map< std::string, double > freqMap; 48 | double maxTokenFrequency = 1.0; 49 | if (isFreqWeighting) { 50 | update_frequency_map(freqMap, frequencyMap, i); 51 | } 52 | 53 | for (int j = 0; j < nTokens; j++) { 54 | std::string token = tokens[j]; 55 | double tokenFrequency = 1.0, tokenInverseFrequency = 1.0; 56 | if (isFreqWeighting) { 57 | update_token_frequency(tokenFrequency, freqMap, token); 58 | update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how); 59 | } 60 | if (lexiconMap.find(token) != lexiconMap.end()) { 61 | tokenScores[j] = lexiconMap.at(token); 62 | 63 | if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") { 64 | update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how, 65 | nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N); 66 | } 67 | 68 | int k = std::max(0, j - 1); 69 | if (valenceMap.find(tokens[k]) != valenceMap.end()) { // bigram valence shifting 70 | tokenShifters[j] = valenceMap.at(tokens[k]); 71 | } 72 | } 73 | } 74 | update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters, 75 | tokenWeights, nL, nTokens, how, nPuncts); 76 | 77 | sentScores(i, 0) = nTokens; 78 | for (int m = 0; m < nL; m++) { 79 | sentScores(i, m + 1) = scores[m]; 80 | } 81 | 82 | } 83 | } 84 | 85 | }; 86 | 87 | #endif 88 | 89 | -------------------------------------------------------------------------------- /src/SentimentScorerClusters.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef SENTIMENT_CLUSTERS 3 | #define SENTIMENT_CLUSTERS 4 | 5 | struct SentimentScorerClusters : public RcppParallel::Worker { 6 | 7 | // thread-safe input 8 | const std::vector< std::vector< std::string > > texts; 9 | const std::unordered_map< std::string, std::vector< double > > lexiconMap; 10 | const std::unordered_map< std::string, double > valenceMap; 11 | const std::string how; 12 | const int nL; 13 | const int N; 14 | std::unordered_map< int, std::unordered_map< std::string, double> > frequencyMap; 15 | std::unordered_map< std::string, double > inverseFrequencyMap; 16 | const bool isFreqWeighting; 17 | 18 | // output 19 | RcppParallel::RMatrix sentScores; 20 | 21 | SentimentScorerClusters(const std::vector< std::vector< std::string > > texts, 22 | const std::unordered_map< std::string, std::vector< double > > lexiconMap, 23 | const std::unordered_map< std::string, double > valenceMap, 24 | const std::string how, 25 | int nL, 26 | int N, 27 | std::unordered_map > frequencyMap, 28 | std::unordered_map< std::string, double > inverseFrequencyMap, 29 | const bool isFreqWeighting, 30 | Rcpp::NumericMatrix sentScores) 31 | : texts(texts), lexiconMap(lexiconMap), valenceMap(valenceMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap), 32 | inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {} 33 | 34 | void operator()(std::size_t begin, std::size_t end) { 35 | 36 | for (std::size_t i = begin; i < end; i++) { 37 | 38 | std::vector< std::string > tokens = texts[i]; 39 | std::vector< double > scores(nL, 0.0); 40 | std::vector< double > nPolarized(nL, 0.0); 41 | double normalizer = 0.0, maxTokenFrequency = 1.0; 42 | int nTokens = tokens.size(), lB = 0, nB = 4, nA = 2; 43 | int nPuncts = 0; 44 | std::vector< std::vector< double > > tokenScores(nTokens, std::vector< double >(nL, 0.0)); 45 | 46 | std::vector< double > tokenWeights(nTokens, 0.0); 47 | std::vector< double > tokenShifters(nTokens, 1.0); 48 | std::unordered_map< std::string, double > freqMap; 49 | 50 | if (isFreqWeighting) { 51 | update_frequency_map(freqMap, frequencyMap, i); 52 | } 53 | 54 | for (int j = 0; j < nTokens; j++) { 55 | std::string token = tokens[j]; 56 | double tokenFrequency = 1.0, tokenInverseFrequency = 1.0; 57 | if (isFreqWeighting) { 58 | update_token_frequency(tokenFrequency, freqMap, token); 59 | update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how); 60 | } 61 | if (lexiconMap.find(token) != lexiconMap.end()) { // hit 62 | tokenScores[j] = lexiconMap.at(token); 63 | std::vector shifters(3); 64 | 65 | if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") { 66 | update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how, 67 | nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N); 68 | } 69 | 70 | int st = std::max(lB, j - nB); 71 | int en = std::min(nTokens, j + nA + 1); 72 | 73 | for (int k = st; k < en; k++) { 74 | if (k == j) continue; 75 | std::string token_k = tokens[k]; 76 | if (lexiconMap.find(token_k) != lexiconMap.end()) { 77 | tokenScores[k] = lexiconMap.at(token_k); 78 | if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") { 79 | update_token_weights(tokenWeights, normalizer, nPolarized, k, nTokens, how, 80 | nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N); 81 | } 82 | } else if (valenceMap.find(token_k) != valenceMap.end()) { 83 | double valType = valenceMap.at(token_k); 84 | update_primary_shifters(shifters, valType); 85 | } 86 | } 87 | tokenShifters[j] = compute_cluster_impact(shifters); 88 | 89 | lB = en + 1; // reset index such that polarity clusters are not overlapping 90 | j = en; // updated to j + 1 immediately after 91 | } 92 | } 93 | update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters, 94 | tokenWeights, nL, nTokens, how, nPuncts); 95 | 96 | sentScores(i, 0) = nTokens; 97 | for (int m = 0; m < nL; m++) { 98 | sentScores(i, m + 1) = scores[m]; 99 | } 100 | 101 | } 102 | } 103 | 104 | }; 105 | 106 | #endif 107 | 108 | -------------------------------------------------------------------------------- /src/SentimentScorerOnegrams.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef SENTIMENT_ONEGRAMS 3 | #define SENTIMENT_ONEGRAMS 4 | 5 | struct SentimentScorerOnegrams : public RcppParallel::Worker { 6 | 7 | // thread-safe input 8 | const std::vector< std::vector< std::string > > texts; 9 | const std::unordered_map< std::string, std::vector< double> > lexiconMap; 10 | const std::string how; 11 | const int nL; 12 | const int N; 13 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap; 14 | std::unordered_map< std::string, double > inverseFrequencyMap; 15 | const bool isFreqWeighting; 16 | 17 | // output 18 | RcppParallel::RMatrix sentScores; 19 | 20 | SentimentScorerOnegrams(const std::vector< std::vector< std::string > > texts, 21 | const std::unordered_map< std::string, std::vector< double > > lexiconMap, 22 | const std::string how, 23 | int nL, 24 | int N, 25 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap, 26 | std::unordered_map< std::string, double > inverseFrequencyMap, 27 | const bool isFreqWeighting, 28 | Rcpp::NumericMatrix sentScores) 29 | : texts(texts), lexiconMap(lexiconMap), how(how), nL(nL), N(N), frequencyMap(frequencyMap), 30 | inverseFrequencyMap(inverseFrequencyMap), isFreqWeighting(isFreqWeighting), sentScores(sentScores) {} 31 | 32 | void operator()(std::size_t begin, std::size_t end) { 33 | 34 | for (std::size_t i = begin; i < end; i++) { 35 | 36 | std::vector< std::string > tokens = texts[i]; 37 | std::vector< double > scores(nL, 0.0); // scores for 1 texts for different lexicons 38 | std::vector< double > nPolarized(nL, 0.0); 39 | double normalizer = 0.0; 40 | int nTokens = tokens.size(); 41 | int nPuncts = 0; 42 | std::vector< std::vector< double > > tokenScores(nTokens,std::vector< double >(nL, 0.0)); 43 | std::vector< double > tokenWeights(nTokens, 0.0); 44 | std::vector< double > tokenShifters(nTokens, 1.0); 45 | std::unordered_map< std::string, double > freqMap; 46 | double maxTokenFrequency = 1.0; 47 | if (isFreqWeighting) { 48 | update_frequency_map(freqMap, frequencyMap, i); 49 | } 50 | 51 | for (int j = 0; j < nTokens; j++) { 52 | std::string token = tokens[j]; 53 | double tokenFrequency = 1.0; 54 | double tokenInverseFrequency = 1.0; 55 | if (isFreqWeighting) { 56 | update_token_frequency(tokenFrequency, freqMap, token); 57 | update_token_inverse_frequency(tokenInverseFrequency, inverseFrequencyMap, token, how); 58 | } 59 | 60 | if (lexiconMap.find(token) != lexiconMap.end()) { 61 | tokenScores[j] = lexiconMap.at(token); // get value of token for each lexicon 62 | } 63 | if (how != "proportional" && how != "counts" && how != "proportionalSquareRoot") { 64 | update_token_weights(tokenWeights, normalizer, nPolarized, j, nTokens, how, 65 | nL, tokenScores, tokenFrequency, tokenInverseFrequency, maxTokenFrequency, N); 66 | } 67 | } 68 | update_token_scores(scores, tokenScores, normalizer, nPolarized, tokenShifters, 69 | tokenWeights, nL, nTokens, how, nPuncts); 70 | 71 | sentScores(i, 0) = nTokens; 72 | for (int m = 0; m < nL; m++) { 73 | sentScores(i, m + 1) = scores[m]; 74 | } 75 | 76 | } 77 | } 78 | 79 | }; 80 | 81 | #endif 82 | 83 | -------------------------------------------------------------------------------- /src/compute_df.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | // [[Rcpp::depends(RcppArmadillo)]] 4 | 5 | using namespace arma; 6 | using namespace Rcpp; 7 | 8 | // elastic net degrees of freedom estimator (Tibshirani and Taylor, 2012) 9 | 10 | // [[Rcpp::export]] 11 | Rcpp::NumericVector compute_df(double alpha, 12 | Rcpp::NumericVector lambda, 13 | Rcpp::List xA) { 14 | int nLambda = lambda.size(); 15 | Rcpp::NumericVector dfA(nLambda); 16 | for (int i = 0; i < nLambda; i++) { 17 | arma::mat matr = xA[i]; 18 | double nA = matr.n_cols; 19 | if (nA == 0) { 20 | dfA[i] = 1L; 21 | } else if (alpha == 0) { // ridge df 22 | arma::vec s; 23 | bool pass = arma::svd(s, matr); 24 | if (pass == true) { 25 | arma::vec ss = arma::pow(s, 2); 26 | double estimate = arma::sum(ss / (ss + lambda[i])); 27 | dfA[i] = estimate; 28 | } else { 29 | dfA[i] = NumericVector::get_na(); 30 | } 31 | } else if (alpha == 1) { // lasso df 32 | dfA[i] = nA; 33 | } else { // elastic net df 34 | arma::mat inverted; 35 | arma::mat toInvert = matr.t() * matr + (1 - alpha) * lambda[i] * arma::eye(nA, nA); 36 | bool pass = arma::inv(inverted, toInvert); 37 | if (pass == true) { 38 | double estimate = arma::sum(arma::diagvec(matr * inverted * matr.t())); 39 | dfA[i] = estimate; 40 | } else { 41 | dfA[i] = NumericVector::get_na(); 42 | } 43 | } 44 | } 45 | return dfA; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/compute_sentiment_onegrams.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "utils.h" 5 | #include "SentimentScorerOnegrams.h" 6 | 7 | // [[Rcpp::depends(RcppParallel)]] 8 | using namespace std; 9 | using namespace Rcpp; 10 | using namespace RcppParallel; 11 | 12 | // [[Rcpp::export]] 13 | Rcpp::NumericMatrix compute_sentiment_onegrams(std::vector< std::vector> texts, 14 | Rcpp::List lexicons, 15 | std::string how) { 16 | 17 | int N = texts.size(); // already tokenized texts 18 | int nL = lexicons.size(); 19 | bool isFreqWeighting = is_frequency_weighting(how); 20 | Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL); 21 | 22 | std::unordered_map< std::string, std::vector< double> > lexiconMap = make_lexicon_map(lexicons, nL); 23 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap; 24 | std::unordered_map< std::string, double > inverseFrequencyMap; 25 | 26 | if (isFreqWeighting) { 27 | make_frequency_maps(frequencyMap, inverseFrequencyMap, texts); 28 | } 29 | Rcpp::NumericMatrix sentScores(N, nL + 1); // output matrix of word count and sentiment scores 30 | SentimentScorerOnegrams sentimentScorer(texts, lexiconMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores); 31 | parallelFor(0, N, sentimentScorer); 32 | colnames(sentScores) = colNames; 33 | 34 | return(sentScores); 35 | } 36 | 37 | -------------------------------------------------------------------------------- /src/compute_sentiment_sentences.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "utils.h" 5 | #include "SentimentScorerSentences.h" 6 | 7 | // [[Rcpp::depends(RcppParallel)]] 8 | 9 | using namespace std; 10 | using namespace Rcpp; 11 | using namespace RcppParallel; 12 | 13 | // [[Rcpp::export]] 14 | Rcpp::NumericMatrix compute_sentiment_sentences(std::vector> texts, 15 | Rcpp::List lexicons, 16 | std::string how, 17 | int valenceType) { 18 | 19 | int N = texts.size(); // already tokenized texts 20 | int nL = lexicons.size(); 21 | if (valenceType != 0) { 22 | nL = lexicons.size() - 1; // the last one has the valence shifters 23 | } 24 | bool isFreqWeighting = is_frequency_weighting(how); 25 | 26 | Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL); 27 | 28 | std::unordered_map< std::string, std::vector< double> > lexiconMap = make_lexicon_map(lexicons, nL); 29 | std::unordered_map< int, std::unordered_map< std::string, double > > frequencyMap; 30 | std::unordered_map< std::string, double > inverseFrequencyMap; 31 | if (isFreqWeighting) { 32 | make_frequency_maps(frequencyMap, inverseFrequencyMap, texts); 33 | } 34 | std::unordered_map< std::string, double > valenceMap; 35 | if (valenceType != 0) { 36 | Rcpp::List valenceList = lexicons[nL]; 37 | valenceMap = make_valence_map(valenceList); 38 | } 39 | 40 | Rcpp::NumericMatrix sentScores(N, nL + 1); 41 | 42 | SentimentScorerSentences sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, valenceType, sentScores); 43 | parallelFor(0, N, sentimentScorer); 44 | 45 | colnames(sentScores) = colNames; 46 | 47 | return(sentScores); 48 | 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/compute_sentiment_valence.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "utils.h" 5 | #include "SentimentScorerBigrams.h" 6 | #include "SentimentScorerClusters.h" 7 | // [[Rcpp::depends(RcppParallel)]] 8 | 9 | using namespace std; 10 | using namespace Rcpp; 11 | using namespace RcppParallel; 12 | 13 | // [[Rcpp::export]] 14 | Rcpp::NumericMatrix compute_sentiment_valence(std::vector< std::vector > texts, 15 | Rcpp::List lexicons, 16 | std::string how) { 17 | 18 | int N = texts.size(); // already tokenized texts 19 | int nL = lexicons.size() - 1; // the last one has the valence shifters 20 | bool isFreqWeighting = is_frequency_weighting(how); 21 | Rcpp::CharacterVector colNames = prepare_column_names(lexicons.names(), nL); 22 | 23 | std::unordered_map< std::string, std::vector > lexiconMap = make_lexicon_map(lexicons, nL); 24 | 25 | Rcpp::List valenceList = lexicons[nL]; 26 | Rcpp::CharacterVector valenceCols = valenceList.names(); 27 | std::unordered_map< std::string, double > valenceMap = make_valence_map(valenceList); 28 | std::unordered_map< int, std::unordered_map > frequencyMap; 29 | std::unordered_map< std::string, double > inverseFrequencyMap; 30 | if (isFreqWeighting) { 31 | make_frequency_maps(frequencyMap, inverseFrequencyMap, texts); 32 | } 33 | 34 | Rcpp::NumericMatrix sentScores(N, nL + 1); // output matrix of word count and sentiment scores 35 | if (valenceCols[1] == "y") { 36 | SentimentScorerBigrams sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores); 37 | parallelFor(0, N, sentimentScorer); 38 | } else if (valenceCols[1] == "t") { 39 | SentimentScorerClusters sentimentScorer(texts, lexiconMap, valenceMap, how, nL, N, frequencyMap, inverseFrequencyMap, isFreqWeighting, sentScores); 40 | parallelFor(0, N, sentimentScorer); 41 | } 42 | 43 | colnames(sentScores) = colNames; 44 | 45 | return(sentScores); 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/fill_NAs.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | Rcpp::NumericMatrix fill_NAs(Rcpp::NumericMatrix x) { 8 | int n = x.nrow(); 9 | int m = x.ncol(); 10 | for (int i = 0; i < m; i++) { 11 | int k = 0; // current index of fill value 12 | Rcpp::NumericVector col = x(_, i); 13 | for (int j = 0; j < n; j++) { 14 | if (NumericVector::is_na(col[j])) { 15 | col[j] = col[k]; // add in fill value 16 | } else { 17 | k = j; // update index 18 | } 19 | } 20 | x(_, i) = col; 21 | } 22 | return(x); 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/get_dtf_vectors.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "utils.h" 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | List get_dtf_vectors(std::vector> texts) { 8 | std::unordered_map< int, std::unordered_map< std::string, double > > tokenMap; 9 | std::unordered_map< std::string, double > docMap; 10 | make_frequency_maps(tokenMap, docMap, texts); 11 | return List::create(Named("DF") = docMap, Named("TF") = tokenMap); 12 | } 13 | 14 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | 2 | library("testthat") 3 | library("sentometrics") 4 | 5 | test_check("sentometrics") 6 | 7 | -------------------------------------------------------------------------------- /tests/testthat/test_aggregation.R: -------------------------------------------------------------------------------- 1 | 2 | context("Aggregation") 3 | 4 | library("data.table") 5 | library("quanteda") 6 | 7 | set.seed(123) 8 | 9 | # corpus, lexicon and aggregation control creation 10 | data("usnews") 11 | corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 1000) 12 | 13 | data("list_lexicons") 14 | lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")]) 15 | lexClust <- sento_lexicons(list_lexicons[c("GI_en", "LM_en", "HENRY_en")], 16 | list_valence_shifters[["en"]][, c("x", "t")]) 17 | 18 | ### tests from here ### 19 | 20 | ctr1 <- ctr_agg(howWithin = "proportionalPol", howDocs = "equal_weight", howTime = "almon", by = "month", 21 | lag = 5, ordersAlm = 1:3, do.inverseAlm = TRUE) 22 | sentMeas1 <- sento_measures(corpus, lex, ctr1) 23 | 24 | ctr2 <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("equal_weight", "linear", "own"), 25 | by = "year", lag = 2, weights = data.frame(q1 = c(0.25, 0.75), q3 = c(0.75, 0.25)), 26 | do.ignoreZeros = FALSE, do.sentence = TRUE) 27 | sentMeas2 <- sento_measures(corpus, lex, ctr2) 28 | 29 | ctr3 <- ctr_agg(howWithin = "counts", howDocs = "inverseProportional", howTime = c("equal_weight", "own"), 30 | by = "year", lag = 3, weights = data.frame(GI_en = c(0.3, 0.6, 0.1))) 31 | 32 | ctr4 <- ctr_agg(howWithin = "UShaped", howDocs = "inverseProportional", howTime = "exponential", 33 | do.inverseExp = TRUE, alphas = c(0.1, 0.2, 0.3), by = "day", lag = 180) 34 | 35 | ctr5 <- ctr_agg(howWithin = "counts", howDocs = "exponential", alphaExpDocs = 0.2, 36 | howTime = "linear", by = "year", lag = 3) 37 | 38 | ctr6 <- ctr_agg(howWithin = "TFIDF", howDocs = "inverseExponential", alphaExpDocs = 0.1, 39 | howTime = "equal_weight", by = "week", lag = 7) 40 | 41 | # sento_measures 42 | test_that("Number of columns coincide with provided dimensions", { 43 | expect_equal(nmeasures(sentMeas1), length(sentMeas1$features) * length(sentMeas1$lexicons) * length(sentMeas1$time)) 44 | expect_equal(nmeasures(sentMeas2), length(sentMeas2$features) * length(sentMeas2$lexicons) * length(sentMeas2$time)) 45 | }) 46 | 47 | # ctr_agg 48 | test_that("Aggregation control function breaks when wrong inputs supplied", { 49 | expect_error(ctr_agg(howWithin = c("oops", "again"), howDocs = c("mistake", "forYou"), howTime = "bad", 50 | lag = 42, by = "infinitely", fill = "theMartiniPolice", nCore = c("yes", "man"))) 51 | expect_error(ctr_agg(howTime = c("almon", "beta", "exponential"), lag = 0, 52 | ordersAlm = -1:2, aBeta = -2, bBeta = -3, alphasExp = c(-1, -3))) 53 | expect_message(ctr_agg(howTime = "linear", lag = 4, weights = data.frame(a = c(1/2, 1/2)))) 54 | expect_error(ctr_agg(howTime = "own", lag = 12, weights = data.frame("dot--hacker" = rep(1/12, 12), check.names = FALSE))) 55 | expect_message(ctr_agg(howTime = c("linear", "beta"), lag = 1)) 56 | }) 57 | 58 | # aggregate.sentiment 59 | s1 <- compute_sentiment(corpus, lex, how = "proportional") 60 | s2 <- compute_sentiment(as.character(corpus), lex, how = "counts") 61 | s3 <- compute_sentiment(corpus, lexClust, how = "proportionalSquareRoot", do.sentence = TRUE) 62 | sentimentAgg <- aggregate(s3, ctr_agg(lag = 7), do.full = FALSE) 63 | test_that("Test input and output of sentiment aggregation functionality", { 64 | expect_true(inherits(s1, "sentiment")) 65 | expect_true(inherits(s2, "data.table")) 66 | expect_true(inherits(s3, "sentiment")) 67 | expect_true(inherits(aggregate(s1, ctr1), "sento_measures")) 68 | expect_true(inherits(aggregate(s3, ctr1), "sento_measures")) # sentence-level with dates (full) 69 | expect_true(inherits(aggregate(s3, ctr1, do.full = FALSE), "sentiment")) 70 | expect_error(aggregate(s2, ctr2)) # doc-level but no dates 71 | expect_error(sento_measures(corpus, lex, ctr3)) # because overlapping names specified 72 | expect_true(inherits(sento_measures(corpus, lex, ctr4), "sento_measures")) 73 | expect_true(inherits(sento_measures(corpus, lex, ctr5), "sento_measures")) 74 | expect_true(inherits(sento_measures(corpus, lex, ctr6), "sento_measures")) 75 | # expect_true(all.equal(sentimentAgg[["word_count"]], s1[["word_count"]])) 76 | }) 77 | 78 | # peakdocs 79 | test_that("Output for peak documents extraction in line with input", { 80 | expect_length(peakdocs(s1, n = 7, type = "both"), 7) 81 | expect_length(peakdocs(s1, n = 11, type = "pos"), 11) 82 | expect_length(peakdocs(s1, n = 1, type = "neg"), 1) 83 | expect_length(peakdocs(s1, n = 25, type = "both", do.average = TRUE), 25) 84 | }) 85 | 86 | # peakdates 87 | test_that("Output for peak dates extraction in line with input", { 88 | expect_length(peakdates(sentMeas1, n = 15, type = "both"), 15) 89 | expect_length(peakdates(sentMeas1, n = 21, type = "pos"), 21) 90 | expect_length(peakdates(sentMeas1, n = 4, type = "neg"), 4) 91 | expect_length(peakdates(sentMeas1, n = 10, type = "both", do.average = TRUE), 10) 92 | }) 93 | 94 | -------------------------------------------------------------------------------- /tests/testthat/test_attribution.R: -------------------------------------------------------------------------------- 1 | 2 | context("Attribution") 3 | 4 | cat("\n") 5 | 6 | library("data.table") 7 | library("quanteda") 8 | 9 | set.seed(123) 10 | 11 | # corpus, lexicon and aggregation control creation 12 | data("usnews") 13 | corpus <- quanteda::corpus_sample( 14 | quanteda::corpus_subset(sento_corpus(corpusdf = usnews), date >= "1997-01-01" & date <= "2000-12-01"), 15 | 500 16 | ) 17 | 18 | data("list_lexicons") 19 | lex <- sento_lexicons(list_lexicons[c("GI_en", "LM_en")]) 20 | ctrA <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = "almon", by = "day", 21 | lag = 24, ordersAlm = 1:3, do.inverseAlm = TRUE, do.ignoreZeros = FALSE, fill = "latest") 22 | 23 | sento_measures <- sento_measures(corpus, lex, ctrA) 24 | 25 | # preparation of estimation data 26 | N <- nobs(sento_measures) 27 | y <- rnorm(N) # random y variable 28 | x <- data.frame(runif(N), rnorm(N)) # two additional random x variables 29 | colnames(x) <- c("x1", "x2") 30 | 31 | # model run 32 | ctrM <- ctr_model(model = "gaussian", type = "Cp", do.iter = TRUE, h = 3, lambdas = NULL, 33 | nSample = N - 12, do.shrinkage.x = TRUE, alphas = 0) 34 | out <- sento_model(sento_measures, y, x = x, ctr = ctrM) 35 | 36 | ### tests from here ### 37 | 38 | attributions <- attributions(out, sento_measures, do.normalize = FALSE) 39 | 40 | l <- rowSums(attributions$lexicons[, -1], na.rm = TRUE) 41 | f <- rowSums(attributions$features[, -1], na.rm = TRUE) 42 | t <- rowSums(attributions$time[, -1], na.rm = TRUE) 43 | la <- rowSums(attributions$lags[, -1], na.rm = TRUE) 44 | # d <- as.vector(sapply(attributions$documents, function(x) return(sum(x$attrib, na.rm = TRUE)))) 45 | 46 | TOL <- 1e-04 47 | 48 | # attributions 49 | test_that("Attributions across all dimensions should be the same across rows", { 50 | expect_equal(l, f) 51 | expect_equal(l, t) 52 | expect_equal(l, la, tolerance = TOL) 53 | # expect_equal(l, d) # does not hold because fill = "latest" 54 | expect_equal(f, t) 55 | expect_equal(f, la, tolerance = TOL) 56 | # expect_equal(f, d) 57 | expect_equal(t, la, tolerance = TOL) 58 | # expect_equal(t, d) 59 | # expect_equal(la, d) 60 | }) 61 | 62 | # plot.attributions 63 | p <- plot(attributions, group = sample(c("features", "lexicons", "time", "lags"), 1)) 64 | test_that("Plot is a ggplot object", { 65 | expect_true(inherits(p, "ggplot")) 66 | }) 67 | 68 | -------------------------------------------------------------------------------- /tests/testthat/test_methods_sentomeasures.R: -------------------------------------------------------------------------------- 1 | 2 | context("Methods sentomeasures") 3 | 4 | library("data.table") 5 | library("quanteda") 6 | 7 | set.seed(123) 8 | 9 | # corpus, lexicon and aggregation control creation 10 | data("usnews") 11 | corpus <- quanteda::corpus_sample(sento_corpus(corpusdf = usnews), size = 600) 12 | 13 | data("list_lexicons") 14 | lex <- sento_lexicons(list_lexicons[c("HENRY_en", "LM_en")]) 15 | ctr <- ctr_agg(howWithin = "counts", howDocs = "proportional", howTime = c("linear", "exponential"), by = "day", 16 | lag = 60, alphasExp = c(0.1, 0.6)) 17 | 18 | sentMeas <- sento_measures(corpus, lex, ctr) 19 | 20 | ### tests from here ### 21 | 22 | # diff 23 | N <- nobs(sentMeas) 24 | M <- nmeasures(sentMeas) 25 | test_that("Differencing is properly done", { 26 | expect_equal(nobs(diff(sentMeas, lag = 1)), N - 1) 27 | expect_equal(nobs(diff(sentMeas, lag = 2, differences = 3)), N - 2 * 3) 28 | }) 29 | 30 | # scale 31 | s1 <- scale(sentMeas) 32 | s2 <- suppressWarnings(scale(sentMeas, center = -as.matrix(as.data.table(sentMeas)[, -1]), scale = FALSE)) 33 | s3 <- scale(sentMeas, center = as.numeric(sentMeas$stats["mean", ]), scale = as.numeric(sentMeas$stats["sd", ])) 34 | s4 <- scale(sentMeas, 35 | center = -matrix(as.numeric(sentMeas$stats["mean", ]), nrow = N, ncol = M, byrow = TRUE), 36 | scale = matrix(as.numeric(sentMeas$stats["sd", ]), nrow = N, ncol = M, byrow = TRUE)) 37 | test_that("Scaling is properly done", { 38 | expect_equal(rowMeans(s1$stats["mean", ], na.rm = TRUE), c(mean = 0)) 39 | expect_equal(rowMeans(s1$stats["sd", ], na.rm = TRUE), c(sd = 1)) 40 | expect_equal(rowMeans(s2$stats["mean", ], na.rm = TRUE), c(mean = 0)) 41 | expect_equal(rowMeans(s2$stats["sd", ], na.rm = TRUE), c(sd = 0)) 42 | expect_equal(s1$stats["mean", ], s3$stats["mean", ]) 43 | expect_equal(s1$stats["sd", ], s3$stats["sd", ]) 44 | expect_equal(s1$stats["mean", ], s4$stats["mean", ]) 45 | expect_equal(s1$stats["sd", ], s4$stats["sd", ]) 46 | }) 47 | 48 | # summary.sentomeasures, print.sentomeasures 49 | cat("\n") 50 | test_that("No output returned when object summarized or printed", { 51 | expect_null(summary(sentMeas)) 52 | expect_null(print(sentMeas)) 53 | }) 54 | 55 | # plot.sentomeasures 56 | p <- plot(sentMeas, group = sample(c("features", "lexicons", "time"), 1)) 57 | test_that("Plot is a ggplot object", { 58 | expect_true(inherits(p, "ggplot")) 59 | }) 60 | 61 | # as.data.table, measures_to_long 62 | measuresLong <- as.data.table(sentMeas, format = "long") 63 | test_that("Proper long formatting of sentiment measures", { 64 | expect_true(nrow(measuresLong) == nobs(sentMeas) * nmeasures(sentMeas)) 65 | expect_true(all(sentMeas$lexicons %in% unique(measuresLong[["lexicons"]]))) 66 | expect_true(all(sentMeas$features %in% unique(measuresLong[["features"]]))) 67 | expect_true(all(sentMeas$time %in% unique(measuresLong[["time"]]))) 68 | expect_true(all(as.data.table(sentMeas)[["date"]] %in% unique(measuresLong[["date"]]))) 69 | }) 70 | 71 | # as.data.frame 72 | test_that("Proper data.frame conversion", { 73 | expect_true(class(as.data.frame(sentMeas)) == "data.frame") 74 | }) 75 | 76 | -------------------------------------------------------------------------------- /vignettes/contributions/gopress_figures/read_later.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/vignettes/contributions/gopress_figures/read_later.jpg -------------------------------------------------------------------------------- /vignettes/contributions/gopress_figures/save_as.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SentometricsResearch/sentometrics/eafb2bd67145e098aed13e85f7ade086ebb3d607/vignettes/contributions/gopress_figures/save_as.jpg -------------------------------------------------------------------------------- /vignettes/development.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Future development" 3 | output: rmarkdown::html_vignette 4 | --- 5 | 6 | Here is an overview of the most important anticipated developments, and known bugs or minor unfinished business. If you want to help out on some of these things, contact [Samuel Borms](mailto:borms_sam@hotmail.com), or simply open an issue and file a pull request on GitHub. 7 | 8 | ### Extensions 9 | 10 | - Implement a `sento_train()` function to for instance generate a lexicon from a corpus. 11 | 12 | - Add topic modeling functionality into the `add_features()` function (or as part of the `sento_train()` function). 13 | 14 | - Expand the number of available models in the `sento_model()` function (e.g. constrained regression, and PCA). 15 | 16 | - Implement an optimization approach into the `aggregate.sento_measures(..., do.global = TRUE)` function to extract optimized weights across dimensions (make it possibly available through the `sento_model()` function); this includes allowing weights to be set in the `aggregate.sento_measures()` function instead of averaging by default. 17 | 18 | - Implement fast textual sentiment computation for lexicons with ngrams. 19 | 20 | - Implement a `scale.sentiment()` function. 21 | 22 | - Add a `head.sento_measures()` and a `tail.sento_measures()` function. 23 | 24 | - Implement a structure to support high-frequency intraday aggregation. 25 | 26 | - Make more lexicons available (e.g. in German and Spanish). 27 | 28 | - Give more control to the user to play with **`glmnet`** parameters in the `sento_model()` function. 29 | 30 | - Write a helper function to aggregate an `attributions` object into clusters. 31 | 32 | - Resolve inconsistency with `data.frame` input columns (`"text(s)"` & `"(doc_)id"`) in the **`sentometrics`**, **`quanteda`** and **`tm`** corpus creators. 33 | 34 | - Prepare functional CRAN version of **`sentometrics.app`** package. 35 | 36 | - Find additional computational speed gains (especially after recent additions which introduced some overhead). 37 | 38 | - Add a `"binary"` option to `get_hows()[["words"]]` that turns the sentiment computation into an indicator-like calculation (value of 1 if a text has at least one lexicon word). 39 | 40 | ### Tweaks and bugs 41 | 42 | - Optimize parallelization of iterative model runs (e.g. avoid unnecessary copying of objects across cores). 43 | 44 | - Add a `delete_features()` function as an intuitive counterpart to `add_features()`. 45 | 46 | - Solve issue that column names of sentiment measures output do not deal well with special characters but still get through. 47 | 48 | - Handle `data.frame` and `matrix` input in `sento_model(..., y, ...)` function more consistently. 49 | 50 | - Add references to external **`textdata`** package in examples (e.g. for extra lexicons). 51 | 52 | - Be more flexible for the features in a `sento_corpus` object by also allowing values outside 0 and 1. 53 | 54 | - Make sure subsetting does not maintain a `sentiment` object when it is not supposed to be. 55 | 56 | - Remove all but one (not all) duplicate entries in the `sento_lexicons()` function. 57 | 58 | - Make sure you can also add the `"language"` identifier to a corpus with `add_features()`. 59 | 60 | -------------------------------------------------------------------------------- /vignettes/examples/corpus.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Corpus manipulation" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteEncoding{UTF-8} 7 | --- 8 | 9 | ```{r, include=FALSE} 10 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width = 7, fig.height = 4, fig.align = "center") 11 | ``` 12 | 13 | This tutorial provides insights in how to create, enrich, transform, and analyze a `sento_corpus` object. A `sento_corpus` object is special because it always has a date column, and numeric metadata features. 14 | 15 | **Preparation** 16 |   17 | 18 | ```{r} 19 | library("sentometrics") 20 | library("quanteda") 21 | 22 | data("usnews") 23 | data("list_lexicons") 24 | data("list_valence_shifters") 25 | ``` 26 | 27 | ### Summarize a corpus through some statistics and plots 28 | 29 | The `corpus_summarize()` function allows quickly investigating how your corpus looks like in terms of number of documents, number of tokens, and its metadata features. It can be done at a daily, weekly, monthly, or yearly frequency, and for all the corpus features or only a selection of them. 30 | 31 | ```{r} 32 | corpus <- sento_corpus(usnews) 33 | 34 | summ <- corpus_summarize(corpus, by = "month", features = c("wsj", "wapo")) 35 | stats <- summ[["stats"]] 36 | plots <- summ[["plots"]] 37 | ``` 38 | 39 | The summary consists of a statistics component... 40 | 41 | ```{r} 42 | stats 43 | ``` 44 | 45 | ... and a component with pregenerated graphs of the statistics. 46 | 47 | ```{r} 48 | plots$doc_plot # monthly evolution of the number of documents 49 | plots$feature_plot # monthly evolution of the presence of the two journal features 50 | plots$token_plot # monthly evolution of the token statistics 51 | ``` 52 | 53 | ### Apply **`quanteda`** corpus functions on a `sento_corpus` object 54 | 55 | It is also possible to apply the many corpus manipulation functions of the **`quanteda`** package on a `sento_corpus` object. In fact, the `sento_corpus` object is built on **`quanteda`**'s `corpus` object. 56 | 57 | ```{r} 58 | corpus <- sento_corpus(usnews) 59 | 60 | res <- corpus_reshape(corpus, to = "sentences") 61 | sam <- corpus_sample(corpus, 100) 62 | seg <- corpus_segment(corpus, pattern = "stock", use_docvars = TRUE) 63 | sub <- corpus_subset(corpus, wsj == 1) 64 | tri <- corpus_trim(corpus, "documents", min_ntoken = 300) 65 | trs <- corpus_trim(corpus, "sentences", min_ntoken = 40) 66 | ``` 67 | 68 | ### Enrich a `sento_corpus` object with features 69 | 70 | Using the `add_features()` function, additional features can be added to your corpus, or generated through keywords or regex pattern matching. 71 | 72 | ```{r} 73 | corpus <- sento_corpus(usnews[, 1:3]) 74 | 75 | kw <- list( 76 | E = c("economy", "economic"), 77 | P = c("polic.|Polic.|politi.|Politi."), # a regex pattern 78 | U = c("uncertainty", "uncertain") 79 | ) 80 | 81 | corpus <- add_features(corpus, keywords = kw, do.binary = TRUE, do.regex = c(FALSE, TRUE, FALSE)) 82 | docvars(corpus, "dummyFeature") <- NULL 83 | 84 | head(docvars(corpus), 20) 85 | ``` 86 | 87 | -------------------------------------------------------------------------------- /vignettes/sentometrics.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Getting started with sentometrics" 3 | output: rmarkdown::html_vignette 4 | --- 5 | 6 | ```{r, include=FALSE} 7 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width = 6, fig.height = 4, fig.align = "center") 8 | ``` 9 | 10 | You collected a large number of texts and think it is a good idea to summarize your corpus into several textual sentiment time series, which you ponder could help predicting some variable you are interested in. However, you do not really know how to proceed next... Fortunately, you come across the **`sentometrics`** package, which does exactly what you need! Great! 11 | 12 | ## Installation 13 | 14 | To install the package from CRAN, simply do: 15 | 16 | ```{r, eval=FALSE} 17 | install.packages("sentometrics") 18 | ``` 19 | 20 | To install the latest development version of **`sentometrics`** (which may contain bugs!), execute: 21 | 22 | ```{r, eval=FALSE} 23 | devtools::install_github("sborms/sentometrics") 24 | ``` 25 | 26 | ## Examples 27 | 28 | Check out the **Examples** section. It includes tutorials with a bunch of examples, from simple to a little less simple, and some larger-scale applications. Sentiment computation, aggregation, diagnostic tools, visualization, regression -- it's all in there. 29 | 30 | ## Readings 31 | 32 | Check out the **Research** section, especially our [vignette](https://ssrn.com/abstract=3067734) which explains the ins and outs of the software package along with accompanying code examples. The complete documentation can be found on the [sentometrics CRAN](https://CRAN.R-project.org/package=sentometrics) page. 33 | 34 | ## Shiny app 35 | 36 | You might also want to have a look at the [**`sentometrics.app`**](https://github.com/sborms/sentometrics.app) package. Its `sentometrics.app::sento_app()` function embeds a Shiny application that displays many of **`sentometrics`**' functionalities. Enjoy! 37 | 38 | ## Media 39 | 40 | Earlier versions of the package were presented as a lightning talk at the eRum 2018 (Budapest) and useR! 2019 (Toulouse) conferences, and recorded! 41 | 42 |

    43 | 44 | 45 |

    46 | 47 | --------------------------------------------------------------------------------