├── configure.win ├── src ├── .clang_complete ├── Makevars.in ├── Makevars.win ├── hashtable │ └── maker │ │ ├── orig │ │ ├── negative-words.txt │ │ └── positive-words.txt │ │ ├── makehash.sh │ │ ├── makewords.sh │ │ ├── make2tables.sh │ │ ├── NOTES.txt │ │ ├── positive.txt │ │ └── negative.txt ├── meanr_native.c ├── meanr_nthreads.c └── score.c ├── .travis.yml ├── LICENSE ├── .gitignore ├── .Rbuildignore ├── cleanup ├── NAMESPACE ├── tests ├── degenerate_cases.r └── score.r ├── inst ├── CITATION └── sexputils │ ├── safeomp.h │ ├── reactor.h │ └── RNACI.h ├── R ├── meanr.nthreads.r ├── meanr-package.r └── score.r ├── man ├── meanr.nthreads.Rd ├── meanr-package.Rd └── score.Rd ├── ChangeLog ├── configure.ac ├── DESCRIPTION ├── LICENSE.md └── README.md /configure.win: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/.clang_complete: -------------------------------------------------------------------------------- 1 | -I/usr/share/R/include 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: R 2 | warnings_are_errors: true 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2016-2022 2 | COPYRIGHT HOLDER: Drew Schmidt 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | 4 | *~ 5 | *.swp 6 | 7 | src/Makevars 8 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = @OMP_FLAGS@ -I../inst/sexputils 2 | PKG_LIBS = @OMP_FLAGS@ 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) -I../inst/sexputils 2 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) 3 | -------------------------------------------------------------------------------- /src/hashtable/maker/orig/negative-words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wrathematics/meanr/HEAD/src/hashtable/maker/orig/negative-words.txt -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | 3 | src/.clang_complete 4 | 5 | src/hashtable/maker/ 6 | 7 | LICENSE.md 8 | 9 | gistfile1.txt 10 | -------------------------------------------------------------------------------- /src/hashtable/maker/makehash.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | gperf --includes --readonly-tables --pic --struct-type --slot-name=word words.txt > sentiment.h 4 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | rm -rf ./src/*.dylib 4 | rm -rf ./src/*.so* 5 | rm -rf ./src/*.o 6 | rm -rf ./src/*.d 7 | rm -rf ./src/*.dll 8 | rm -rf ./src/Makevars 9 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(meanr.nthreads) 4 | export(score) 5 | useDynLib(meanr,R_meanr_nthreads) 6 | useDynLib(meanr,R_score) 7 | -------------------------------------------------------------------------------- /src/hashtable/maker/makewords.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # positive 1, negative -1 4 | echo "struct sentwords{ char *word; int score; };" > words.txt 5 | echo "%%" >> words.txt 6 | 7 | sed 's/.*/&, 1/' positive.txt >> words.txt 8 | sed 's/.*/&, -1/' negative.txt >> words.txt 9 | -------------------------------------------------------------------------------- /tests/degenerate_cases.r: -------------------------------------------------------------------------------- 1 | library(meanr) 2 | 3 | # valgrind reports false positives, so set nthreads to 1 4 | test = score("", nthreads=1) 5 | truth = 6 | data.frame( 7 | positive = 0L, 8 | negative = 0L, 9 | score = 0L, 10 | wc = 0L 11 | ) 12 | 13 | stopifnot(identical(test, truth)) 14 | -------------------------------------------------------------------------------- /src/hashtable/maker/make2tables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | gperf --includes --readonly-tables --pic --hash-function-name=poshash --lookup-function-name=in_pos_set --slot-name=word positive.txt > poshash.h 4 | gperf --includes --readonly-tables --pic --hash-function-name=neghash --lookup-function-name=in_neg_set --slot-name=word negative.txt > neghash.h 5 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | note <- sprintf("{R} package version %s", meta$Version) 3 | 4 | bibentry( 5 | bibtype = "Misc", 6 | title = "{meanr}: Sentiment Analysis Scorer", 7 | author = person("Drew", "Schmidt"), 8 | year = year, 9 | note = note, 10 | url = "https://cran.r-project.org/package=meanr", 11 | textVersion = NULL 12 | ) 13 | -------------------------------------------------------------------------------- /tests/score.r: -------------------------------------------------------------------------------- 1 | library(meanr) 2 | 3 | words = "Abundance abundant accessable. Banana apple orange. Abnormal abolish abominable." 4 | 5 | # valgrind reports false positives, so set nthreads to 1 6 | test <- score(words, nthreads=1) 7 | stopifnot(identical(test$positive, 3L)) 8 | stopifnot(identical(test$negative, 3L)) 9 | stopifnot(all.equal(test$score, 0.0)) 10 | stopifnot(identical(test$wc, 9L)) 11 | -------------------------------------------------------------------------------- /R/meanr.nthreads.r: -------------------------------------------------------------------------------- 1 | #' meanr.nthreads 2 | #' 3 | #' Returns the number of cores + hyperthreads on the system. The function 4 | #' respects the environment variable \code{OMP_NUM_THREADS}. 5 | #' 6 | #' @return 7 | #' The number of cores + hyperthreads on the system (an integer). 8 | #' 9 | #' @useDynLib meanr R_meanr_nthreads 10 | #' @export 11 | meanr.nthreads = function() 12 | { 13 | .Call(R_meanr_nthreads) 14 | } 15 | -------------------------------------------------------------------------------- /man/meanr.nthreads.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/meanr.nthreads.r 3 | \name{meanr.nthreads} 4 | \alias{meanr.nthreads} 5 | \title{meanr.nthreads} 6 | \usage{ 7 | meanr.nthreads() 8 | } 9 | \value{ 10 | The number of cores + hyperthreads on the system (an integer). 11 | } 12 | \description{ 13 | Returns the number of cores + hyperthreads on the system. The function 14 | respects the environment variable \code{OMP_NUM_THREADS}. 15 | } 16 | -------------------------------------------------------------------------------- /src/hashtable/maker/NOTES.txt: -------------------------------------------------------------------------------- 1 | positive/negative words from http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html 2 | 3 | 4 | Modifications before creating the hash table: 5 | 6 | * removed headers 7 | * pruned duplicates (all from positive.txt): 8 | - envious 9 | - enviously 10 | - enviousness 11 | * remove mal-formed strings: 12 | - naive (negative) 13 | - a+ (positive) 14 | 15 | 16 | Modifications after creating the hash table: 17 | 18 | * set in_pos/neg_set static 19 | * use (intptr_t) instead of (int)(long) 20 | -------------------------------------------------------------------------------- /src/meanr_native.c: -------------------------------------------------------------------------------- 1 | /* Automatically generated. Do not edit by hand. */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | extern SEXP R_meanr_nthreads(void); 9 | extern SEXP R_score(SEXP s_, SEXP nthreads_); 10 | 11 | static const R_CallMethodDef CallEntries[] = { 12 | {"R_meanr_nthreads", (DL_FUNC) &R_meanr_nthreads, 0}, 13 | {"R_score", (DL_FUNC) &R_score, 2}, 14 | {NULL, NULL, 0} 15 | }; 16 | 17 | void R_init_meanr(DllInfo *dll) 18 | { 19 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 20 | R_useDynamicSymbols(dll, FALSE); 21 | } 22 | -------------------------------------------------------------------------------- /src/meanr_nthreads.c: -------------------------------------------------------------------------------- 1 | #ifdef _OPENMP 2 | #include 3 | #endif 4 | 5 | #include 6 | 7 | #define MIN(a,b) ((a)<(b)?(a):(b)) 8 | 9 | static inline int num_threads(void) 10 | { 11 | int n = 0; 12 | 13 | #ifdef _OPENMP 14 | int nth, tl; 15 | #pragma omp parallel 16 | { 17 | nth = omp_get_num_threads(); 18 | tl = omp_get_thread_limit(); 19 | } 20 | 21 | n = MIN(nth, tl); 22 | #else 23 | n = 1; 24 | #endif 25 | 26 | return n; 27 | } 28 | 29 | 30 | 31 | SEXP R_meanr_nthreads(void) 32 | { 33 | SEXP nth; 34 | newRvec(nth, 1, "int"); 35 | 36 | INT(nth) = num_threads(); 37 | 38 | unhideGC(); 39 | return nth; 40 | } 41 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Release 0.1-6: 2 | * Minor change for CRAN. 3 | 4 | Release 0.1-5: 5 | * C function prototype changes for CRAN. 6 | 7 | Release 0.1-4: 8 | * Minor documentation fix for CRAN. 9 | 10 | Release 0.1-3: 11 | * Fixed build for clang 13. 12 | 13 | Release 0.1-2: 14 | * Fixed build issue on Windows. 15 | * Use latest RNACI. 16 | * Configure test for OpenMP. 17 | * Fixed Solaris OMP issue. 18 | * Use better nthreads lookup. 19 | * Added some improvements to documentation clarity. 20 | 21 | Release 0.1-1: 22 | * Fixed -Wextra compiler warnings. 23 | * Wrote a more coherent description. 24 | * Support for older OpenMP standards (fixes Solaris compiler error). 25 | * Fixed some edge-cases. 26 | 27 | Release 0.1-0: 28 | * Added score(). 29 | * Added multi-corpus parallelism via OpenMP. 30 | -------------------------------------------------------------------------------- /R/meanr-package.r: -------------------------------------------------------------------------------- 1 | #' meanr: Sentiment Analysis Scorer 2 | #' 3 | #' Sentiment analysis is a popular technique in text mining. Roughly 4 | #' speaking, the technique is an attempt to determine the overall emotional 5 | #' attitude of a piece of text (i.e., positive or negative). We provide a new 6 | #' implementation of a common method for computing sentiment, whereby words are 7 | #' scored as positive or negative according to a "dictionary", and then an 8 | #' sum of those scores for the document is produced. We use the 'Hu' and 'Liu' 9 | #' sentiment dictionary for determining sentiment. The scoring function is 10 | #' 'vectorized' by document, and scores for multiple documents are computed in 11 | #' parallel via 'OpenMP'. 12 | #' 13 | #' @name meanr-package 14 | #' @docType package 15 | #' @author Drew Schmidt 16 | #' @keywords Package 17 | NULL 18 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.69]) 2 | AC_INIT 3 | AC_CONFIG_SRCDIR([DESCRIPTION]) 4 | 5 | # Get C compiler from R 6 | : ${R_HOME=`R RHOME`} 7 | if test -z "${R_HOME}"; then 8 | echo "could not determine R_HOME" 9 | exit 1 10 | fi 11 | CC=`"${R_HOME}/bin/R" CMD config CC` 12 | 13 | AC_PROG_CC 14 | AC_OPENMP 15 | if test -n "${OPENMP_CFLAGS}"; then 16 | have_omp="yes" 17 | OMP_FLAGS="\$(SHLIB_OPENMP_CFLAGS)" 18 | else 19 | have_omp="no" 20 | OMP_FLAGS="" 21 | fi 22 | 23 | echo " " 24 | echo "******************* Results of meanr package configure *******************" 25 | echo " " 26 | echo "* OpenMP Report" 27 | echo "* >> Compiler support: ${have_omp}" 28 | echo "* >> CFLAGS = ${OMP_FLAGS}" 29 | echo "**************************************************************************" 30 | echo " " 31 | 32 | AC_SUBST(OMP_FLAGS) 33 | AC_CONFIG_FILES([src/Makevars]) 34 | AC_OUTPUT 35 | -------------------------------------------------------------------------------- /man/meanr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/meanr-package.r 3 | \docType{package} 4 | \name{meanr-package} 5 | \alias{meanr-package} 6 | \title{meanr: Sentiment Analysis Scorer} 7 | \description{ 8 | Sentiment analysis is a popular technique in text mining. Roughly 9 | speaking, the technique is an attempt to determine the overall emotional 10 | attitude of a piece of text (i.e., positive or negative). We provide a new 11 | implementation of a common method for computing sentiment, whereby words are 12 | scored as positive or negative according to a "dictionary", and then an 13 | sum of those scores for the document is produced. We use the 'Hu' and 'Liu' 14 | sentiment dictionary for determining sentiment. The scoring function is 15 | 'vectorized' by document, and scores for multiple documents are computed in 16 | parallel via 'OpenMP'. 17 | } 18 | \author{ 19 | Drew Schmidt 20 | } 21 | \keyword{Package} 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: meanr 2 | Type: Package 3 | Title: Sentiment Analysis Scorer 4 | Version: 0.1-6 5 | Description: Sentiment analysis is a popular technique in text mining that 6 | attempts to determine the emotional state of some text. We provide a new 7 | implementation of a common method for computing sentiment, whereby words are 8 | scored as positive or negative according to a dictionary lookup. Then the 9 | sum of those scores is returned for the document. We use the 'Hu' and 'Liu' 10 | sentiment dictionary ('Hu' and 'Liu', 2004) 11 | for determining sentiment. The scoring function is 'vectorized' by document, 12 | and scores for multiple documents are computed in parallel via 'OpenMP'. 13 | License: BSD 2-clause License + file LICENSE 14 | Depends: 15 | R (>= 3.0.0) 16 | LazyLoad: yes 17 | NeedsCompilation: yes 18 | ByteCompile: yes 19 | Authors@R: c(person("Drew", "Schmidt", role=c("aut", "cre"), 20 | email="wrathematics@gmail.com")) 21 | Maintainer: Drew Schmidt 22 | URL: https://github.com/wrathematics/meanr 23 | BugReports: https://github.com/wrathematics/meanr/issues 24 | RoxygenNote: 7.2.1 25 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2016-2022, Drew Schmidt 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 14 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 16 | EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 18 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 20 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 21 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 22 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /man/score.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/score.r 3 | \name{score} 4 | \alias{score} 5 | \title{score} 6 | \usage{ 7 | score(s, nthreads = meanr.nthreads()) 8 | } 9 | \arguments{ 10 | \item{s}{A string or vector of strings.} 11 | 12 | \item{nthreads}{Number of threads to use. By default it will use the total number of 13 | cores + hyperthreads.} 14 | } 15 | \value{ 16 | A dataframe, consisting of columns "positive", "negative", "score", and "wc". 17 | With the exception of "score", these are counts; that is, "positive" is the 18 | number of positive sentiment words, "negative" is the number of negative 19 | sentiment words, and "wc" is the wordcount (total number of words). 20 | } 21 | \description{ 22 | Computes the sentiment score, the sum of the total number of positive and 23 | negative scored words. The function is vectorized so that it will return one 24 | row per string. The scoring function ignores (upper/lower) case and 25 | punctuation. 26 | } 27 | \details{ 28 | The scoring function uses OpenMP to process text in parallel. 29 | 30 | The function uses the Hu and Liu sentiment dictionary (same as everybody 31 | else) available here: 32 | https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html 33 | } 34 | \examples{ 35 | library(meanr) 36 | s1 = "Abundance abundant accessable." 37 | s2 = "Banana apple orange." 38 | s3 = "Abnormal abolish abominable." 39 | s = c(s1, s2, s3) 40 | 41 | # as separate 'documents' 42 | score(s, nthreads=1) 43 | 44 | # as one document 45 | score(paste0(s, collapse=" "), nthreads=1) 46 | 47 | } 48 | \references{ 49 | Hu, M., & Liu, B. (2004). Mining opinion features in customer 50 | reviews. National Conference on Artificial Intelligence. 51 | } 52 | -------------------------------------------------------------------------------- /R/score.r: -------------------------------------------------------------------------------- 1 | #' score 2 | #' 3 | #' Computes the sentiment score, the sum of the total number of positive and 4 | #' negative scored words. The function is vectorized so that it will return one 5 | #' row per string. The scoring function ignores (upper/lower) case and 6 | #' punctuation. 7 | #' 8 | #' @details 9 | #' The scoring function uses OpenMP to process text in parallel. 10 | #' 11 | #' The function uses the Hu and Liu sentiment dictionary (same as everybody 12 | #' else) available here: 13 | #' https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html 14 | #' 15 | #' @param s 16 | #' A string or vector of strings. 17 | #' @param nthreads 18 | #' Number of threads to use. By default it will use the total number of 19 | #' cores + hyperthreads. 20 | #' 21 | #' @return 22 | #' A dataframe, consisting of columns "positive", "negative", "score", and "wc". 23 | #' With the exception of "score", these are counts; that is, "positive" is the 24 | #' number of positive sentiment words, "negative" is the number of negative 25 | #' sentiment words, and "wc" is the wordcount (total number of words). 26 | #' 27 | #' @examples 28 | #' library(meanr) 29 | #' s1 = "Abundance abundant accessable." 30 | #' s2 = "Banana apple orange." 31 | #' s3 = "Abnormal abolish abominable." 32 | #' s = c(s1, s2, s3) 33 | #' 34 | #' # as separate 'documents' 35 | #' score(s, nthreads=1) 36 | #' 37 | #' # as one document 38 | #' score(paste0(s, collapse=" "), nthreads=1) 39 | #' 40 | #' @references 41 | #' Hu, M., & Liu, B. (2004). Mining opinion features in customer 42 | #' reviews. National Conference on Artificial Intelligence. 43 | #' 44 | #' @useDynLib meanr R_score 45 | #' @export 46 | score = function(s, nthreads=meanr.nthreads()) 47 | { 48 | .Call(R_score, s, nthreads) 49 | } 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # meanr 2 | 3 | * **Version:** 0.1-5 4 | * **URL**: https://github.com/wrathematics/meanr 5 | * **License:** [BSD 2-Clause](https://opensource.org/license/bsd-2-clause/) 6 | * **Author:** Drew Schmidt 7 | 8 | 9 | **meanr** is an R package performing sentiment analysis. Its main method, `score()`, computes sentiment as a simple sum of the counts of positive (+1) and negative (-1) sentiment words in a piece of text. More sophisticated techniques are available to R, for example in the **qdap** package's `polarity()` function. This package uses [the Hu and Liu sentiment dictionary](https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html), same as everybody else. 10 | 11 | **meanr** is significantly faster than everything else I tried (which was actually the motivation for its creation), but I don't claim to have tried everything. I believe the package is quite fast. However, the method is merely a dictionary lookup, so it ignores word context like in more sophisticated methods. On the other hand, the more sophisticated tools are very slow. If you have a large volume of text, I believe there is value in getting a "first glance" at the data, and **meanr** allows you to do this very quickly. 12 | 13 | 14 | 15 | ## Installation 16 | 17 | The stable version is available on CRAN: 18 | 19 | ```r 20 | install.packages("meanr") 21 | ``` 22 | 23 | The development version is maintained on GitHub: 24 | 25 | ```r 26 | remotes::install_github("wrathematics/meanr") 27 | ``` 28 | 29 | 30 | 31 | ## Example Usage 32 | 33 | I have a dataset that, for legal reasons, I can not describe, much less provide. You can think of it like a collection of tweets (they are not tweets). But take my word for it that it's real, English language text. The data is in the form of a vector of strings, which we'll call `x`. 34 | 35 | ```r 36 | x = readRDS("x.rds") 37 | 38 | length(x) 39 | ## [1] 655760 40 | 41 | sum(nchar(x)) 42 | ## [1] 162663972 43 | 44 | library(meanr) 45 | system.time(s <- score(x)) 46 | ## user system elapsed 47 | ## 1.072 0.000 0.285 48 | 49 | head(s) 50 | ## positive negative score wc 51 | ## 1 2 0 2 32 52 | ## 2 5 0 5 29 53 | ## 3 4 2 2 67 54 | ## 4 12 3 9 203 55 | ## 5 8 2 6 101 56 | ## 6 4 3 1 99 57 | ``` 58 | 59 | 60 | 61 | ## How It Works 62 | 63 | The `score()` function receives a vector of strings, and operates on each one as follows: 64 | 65 | 1. The maximum string length is found, and a buffer of that size is allocated. 66 | 2. The string is copied to the buffer. 67 | 3. All punctuation is removed. All characters are converted to lowercase. 68 | 4. Score sentiment: 69 | - Tokenize words as collections of chars separated by a space. 70 | - Check if the word is positive; if not, check if it is negative; if not, then it's assumed to be neutral. Each check is a lookup up in one of two tables of Hu and Liu's dictionaries. 71 | - If the word is in the table, get its value from the hash table (positive words have value 1, negative words -1) and update the various counts. Otherwise, the word is "neutral" (score of 0). 72 | 73 | This is all done in four passes of each string; each pass corresponds to each of the enumerated items above. The hash tables uses perfect hash functions generated by gperf. 74 | -------------------------------------------------------------------------------- /inst/sexputils/safeomp.h: -------------------------------------------------------------------------------- 1 | // tl;dr: You may use the definitions in this header file as you please, 2 | // with or without attribution. No warranty is provided. This "license" applies 3 | // ONLY to this particular file, and not the larger project in which it is 4 | // contained. If you want me to license it differently for you for some reason, 5 | // email me. 6 | 7 | // This file is free and unencumbered software released into the public domain. 8 | // 9 | // Anyone is free to copy, modify, publish, use, compile, sell, or distribute 10 | // this file, for any purpose, and by any means. 11 | // 12 | // In jurisdictions that recognize copyright laws, the author or authors of this 13 | // file dedicate any and all copyright interest in the file to the public 14 | // domain. We make this dedication for the benefit of the public at large and to 15 | // the detriment of our heirs and successors. We intend this dedication to be an 16 | // overt act of relinquishment in perpetuity of all present and future rights to 17 | // this file under copyright law. 18 | // 19 | // THE FILE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | // AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 | // WITH THIS FILE OR THE USE OR OTHER DEALINGS IN THIS FILE. 25 | // 26 | // For more information, please refer to http://unlicense.org/ 27 | 28 | #ifndef __SAFE_OMPUTILS_H__ 29 | #define __SAFE_OMPUTILS_H__ 30 | 31 | // ----------------------------------------------------------------------------- 32 | // Non-OMP vectorization stuff 33 | // ----------------------------------------------------------------------------- 34 | 35 | // Praying to the compiler to please ignore what it thinks are loop dependencies 36 | // and to please just go ahead and vectorize the loop already. Useful for 37 | // loops outside of omp parallel blocks. 38 | // GCC HAS TO GO LAST since so many compilers support --std=gnu99 and so on 39 | #if defined(__INTEL_COMPILER) 40 | #define PLEASE_VECTORIZE _Pragma("ivdep") 41 | #elif defined(__clang__) 42 | #define PLEASE_VECTORIZE _Pragma("clang loop vectorize(enable) interleave(enable)") 43 | #elif defined(__GNUC__) 44 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 9) 45 | #define PLEASE_VECTORIZE _Pragma("GCC ivdep") 46 | #else 47 | #define PLEASE_VECTORIZE 48 | #endif 49 | #else 50 | #define PLEASE_VECTORIZE 51 | #endif 52 | 53 | 54 | 55 | // ----------------------------------------------------------------------------- 56 | // OMP stuff 57 | // ----------------------------------------------------------------------------- 58 | 59 | #define OMP_MIN_SIZE 1000 60 | 61 | 62 | #ifdef _OPENMP 63 | #include 64 | #if _OPENMP >= 201307 65 | #define OMP_VER_4 66 | #elif _OPENMP >= 200805 67 | #define OMP_VER_3 68 | #endif 69 | #endif 70 | 71 | 72 | // Insert SIMD pragma if supported. This pragma _demands_ vectorization, whether 73 | // or not the compiler thinks that's a good idea. However, it will only work 74 | // inside of an omp parallel block. 75 | #ifdef OMP_VER_4 76 | #ifdef _MSC_VER // Microsoft doing it's own non-standard bullshit? I DON'T BELIEVE IT 77 | #define SAFE_SIMD _pragma(omp simd) 78 | #define SAFE_FOR_SIMD _pragma(omp for simd) 79 | #define SAFE_PARALLEL_FOR_SIMD _pragma(omp parallel for simd) 80 | #else 81 | #define SAFE_SIMD _Pragma("omp simd") 82 | #define SAFE_FOR_SIMD _Pragma("omp for simd") 83 | #define SAFE_PARALLEL_FOR_SIMD _Pragma("omp parallel for simd") 84 | #endif 85 | #else 86 | #define SAFE_SIMD PLEASE_VECTORIZE 87 | #define SAFE_FOR_SIMD PLEASE_VECTORIZE 88 | #define SAFE_PARALLEL_FOR_SIMD //TODO 89 | #endif 90 | 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /inst/sexputils/reactor.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2017 by Drew Schmidt 2 | // 3 | // Permission to use, copy, modify, and/or distribute this software for any 4 | // purpose with or without fee is hereby granted. 5 | // 6 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 7 | // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 8 | // AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 9 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 10 | // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 11 | // OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 12 | // PERFORMANCE OF THIS SOFTWARE 13 | 14 | 15 | #ifndef __REACTOR_CTESTS__ 16 | #define __REACTOR_CTESTS__ 17 | 18 | 19 | #include 20 | #include 21 | 22 | 23 | // Tests 24 | 25 | static inline int is_na(SEXP x) 26 | { 27 | switch (TYPEOF(x)) 28 | { 29 | case LGLSXP: 30 | return LOGICAL(x)[0] == NA_LOGICAL; 31 | case INTSXP: 32 | return INTEGER(x)[0] == NA_INTEGER; 33 | case REALSXP: 34 | return ISNA(REAL(x)[0]); 35 | case STRSXP: 36 | return STRING_ELT(x, 0) == NA_STRING; 37 | 38 | default: 39 | return 0; 40 | } 41 | } 42 | 43 | static inline int is_badval(SEXP x) 44 | { 45 | switch (TYPEOF(x)) 46 | { 47 | case LGLSXP: 48 | case INTSXP: 49 | case STRSXP: 50 | return is_na(x); 51 | case REALSXP: 52 | return is_na(x) || ISNAN(REAL(x)[0]) || !R_FINITE(REAL(x)[0]); 53 | 54 | default: 55 | return 0; 56 | } 57 | } 58 | 59 | static inline int is_inty(SEXP x) 60 | { 61 | switch (TYPEOF(x)) 62 | { 63 | case INTSXP: 64 | return 1; 65 | case REALSXP: 66 | return fabs(REAL(x)[0] - (int) REAL(x)[0]) < 1e-10; 67 | 68 | default: 69 | return 0; 70 | } 71 | } 72 | 73 | static inline int is_annoying(SEXP x) 74 | { 75 | return (LENGTH(x) != 1 || is_badval(x)); 76 | } 77 | 78 | static inline int is_negative(SEXP x) 79 | { 80 | switch (TYPEOF(x)) 81 | { 82 | case INTSXP: 83 | return INTEGER(x)[0] < 0; 84 | case REALSXP: 85 | return REAL(x)[0] < 0; 86 | 87 | default: 88 | return 0; 89 | } 90 | } 91 | 92 | static inline int is_zero(SEXP x) 93 | { 94 | switch (TYPEOF(x)) 95 | { 96 | case INTSXP: 97 | return INTEGER(x)[0] == 0; 98 | case REALSXP: 99 | return REAL(x)[0] == 0.0; 100 | 101 | default: 102 | return 0; 103 | } 104 | } 105 | 106 | 107 | 108 | static inline int is_lgl(SEXP x) 109 | { 110 | return TYPEOF(x) == LGLSXP; 111 | } 112 | 113 | static inline int is_str(SEXP x) 114 | { 115 | return TYPEOF(x) == STRSXP; 116 | } 117 | 118 | static inline int is_num(SEXP x) 119 | { 120 | switch (TYPEOF(x)) 121 | { 122 | case INTSXP: 123 | case REALSXP: 124 | return 1; 125 | 126 | default: 127 | return 0; 128 | } 129 | } 130 | 131 | 132 | 133 | // 'Assertions' 134 | 135 | #define CHECK_IS_MATRIX(...) \ 136 | if (!isMatrix(REACTOR_FIRST(__VA_ARGS__))){ \ 137 | REACTOR_ERRMSG("a matrix", __VA_ARGS__);} 138 | 139 | #define CHECK_IS_LIST(...) \ 140 | if (!isVectorList(REACTOR_FIRST(__VA_ARGS__))){ \ 141 | REACTOR_ERRMSG("a list", __VA_ARGS__);} 142 | 143 | 144 | #define CHECK_IS_FLAG(...) \ 145 | if (!is_lgl(REACTOR_FIRST(__VA_ARGS__)) || is_annoying(REACTOR_FIRST(__VA_ARGS__))){ \ 146 | REACTOR_ERRMSG("a flag (TRUE or FALSE)", __VA_ARGS__);} 147 | 148 | #define CHECK_IS_STRING(...) \ 149 | if (!is_str(REACTOR_FIRST(__VA_ARGS__)) || is_annoying(REACTOR_FIRST(__VA_ARGS__))){ \ 150 | REACTOR_ERRMSG("a string (single non-NA character string)", __VA_ARGS__);} 151 | 152 | #define CHECK_IS_STRINGS(...) \ 153 | if (!is_str(REACTOR_FIRST(__VA_ARGS__)) || LENGTH(REACTOR_FIRST(__VA_ARGS__)) == 0){ \ 154 | REACTOR_ERRMSG("a vector of strings", __VA_ARGS__);} 155 | 156 | 157 | 158 | #define CHECK_IS_INT(...) \ 159 | if (!is_inty(REACTOR_FIRST(__VA_ARGS__)) || is_annoying(REACTOR_FIRST(__VA_ARGS__))){ \ 160 | REACTOR_ERRMSG("a positive integer", __VA_ARGS__);} 161 | 162 | #define CHECK_IS_NATNUM(...) \ 163 | if (!is_inty(REACTOR_FIRST(__VA_ARGS__)) || is_annoying(REACTOR_FIRST(__VA_ARGS__)) || is_negative(REACTOR_FIRST(__VA_ARGS__))){ \ 164 | REACTOR_ERRMSG("a positive integer", __VA_ARGS__);} 165 | 166 | #define CHECK_IS_POSINT(...) \ 167 | if (!is_inty(REACTOR_FIRST(__VA_ARGS__)) || is_annoying(REACTOR_FIRST(__VA_ARGS__)) || is_negative(REACTOR_FIRST(__VA_ARGS__)) || is_zero(REACTOR_FIRST(__VA_ARGS__))){ \ 168 | REACTOR_ERRMSG("a positive integer", __VA_ARGS__);} 169 | 170 | 171 | 172 | // internals, pls ignore 173 | #define REACTOR_FIRST_(first, ...) first 174 | #define REACTOR_FIRST(...) REACTOR_FIRST_(__VA_ARGS__, 0) 175 | 176 | #define __REACTOR_ERRMSG(type_explanation, x, argname, ...) error("argument '%s' must be %s", argname, type_explanation); 177 | #define _REACTOR_ERRMSG(type_explanation, x, ...) __REACTOR_ERRMSG(type_explanation, __VA_ARGS__, #x, "") 178 | #define REACTOR_ERRMSG(type_explanation, ...) _REACTOR_ERRMSG(type_explanation, __VA_ARGS__, "") 179 | 180 | 181 | #endif 182 | -------------------------------------------------------------------------------- /src/score.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include // for ispunct() 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "hashtable/poshash.h" 11 | #include "hashtable/neghash.h" 12 | 13 | #define THROW_MEMERR() error("unable to allocate memory") 14 | #define FREE(ptr) if(ptr!=NULL) free(ptr) 15 | 16 | 17 | 18 | // ---------------------------------------------------------------------------- 19 | // Hashtable interface 20 | // ---------------------------------------------------------------------------- 21 | 22 | static inline bool is_pos_sentiment(const char *word, const int wordlen) 23 | { 24 | return in_pos_set(word, wordlen) != NULL; 25 | } 26 | 27 | static inline bool is_neg_sentiment(const char *word, const int wordlen) 28 | { 29 | return in_neg_set(word, wordlen) != NULL; 30 | } 31 | 32 | static inline int8_t get_sentiment_score(const char *word, const int wordlen) 33 | { 34 | if (is_pos_sentiment(word, wordlen)) 35 | return 1; 36 | else if (is_neg_sentiment(word, wordlen)) 37 | return -1; 38 | else 39 | return 0; 40 | } 41 | 42 | 43 | 44 | // ---------------------------------------------------------------------------- 45 | // Finds the necessary size of the temporary storage 46 | // ---------------------------------------------------------------------------- 47 | 48 | #define SCHED_LEN 64 49 | 50 | static inline size_t max_strlen(SEXP s_, const int len, int nthreads) 51 | { 52 | size_t maxlen = 0; 53 | 54 | // NOTE the reduction clause is deliberately missing from omp versions < 4, 55 | // OpenMP didn't include max reductions before then. 56 | #ifdef OMP_VER_4 57 | #pragma omp parallel for simd num_threads(nthreads) schedule(static,SCHED_LEN) if(len>OMP_MIN_SIZE) reduction(max:maxlen) 58 | #else 59 | #pragma omp parallel for num_threads(nthreads) schedule(static,SCHED_LEN) if(len>OMP_MIN_SIZE) // no reduction! 60 | #endif 61 | for (int i=0; i maxlen) 70 | maxlen = tmp; 71 | } 72 | 73 | return maxlen; 74 | } 75 | 76 | 77 | 78 | // ---------------------------------------------------------------------------- 79 | // R interface 80 | // ---------------------------------------------------------------------------- 81 | 82 | SEXP R_score(SEXP s_, SEXP nthreads_) 83 | { 84 | SEXP ret, ret_names; 85 | SEXP positive, negative, scores, nwords; 86 | 87 | CHECK_IS_STRINGS(s_, "s"); 88 | CHECK_IS_POSINT(nthreads_, "nthreads"); 89 | 90 | const int len = LENGTH(s_); 91 | int nthreads = asInteger(nthreads_); 92 | 93 | newRvec(positive, len, "int"); 94 | newRvec(negative, len, "int"); 95 | newRvec(scores, len, "int"); 96 | newRvec(nwords, len, "int"); 97 | 98 | const size_t slen = max_strlen(s_, len, nthreads); 99 | 100 | int8_t check = 0; 101 | 102 | #pragma omp parallel shared(check) num_threads(nthreads) 103 | { 104 | char *s = NULL; 105 | // NOTE uncomment to simulate oom failure 106 | // if (omp_get_thread_num() != 1) 107 | s = malloc(slen * sizeof(*s)); 108 | 109 | // all threads tmp space malloc check 110 | #pragma omp atomic// update 111 | check += (s == NULL); 112 | 113 | 114 | // malloc succeeded 115 | if (!check) 116 | { 117 | #pragma omp for 118 | for (int i=0; i 0) 165 | (*pos)++; 166 | else if (score < 0) 167 | (*neg)++; 168 | 169 | j++; 170 | while (isspace(s[j])) 171 | j++; 172 | 173 | start = j; 174 | } 175 | } 176 | } 177 | } 178 | 179 | FREE(s); 180 | } 181 | 182 | // malloc failed - should be outside of parallel region for proper error handling 183 | if (check) 184 | THROW_MEMERR(); 185 | 186 | make_list_names(ret_names, 4, "positive", "negative", "score", "wc"); 187 | make_dataframe(ret, RNULL, ret_names, 4, positive, negative, scores, nwords); 188 | 189 | unhideGC(); 190 | return ret; 191 | } 192 | -------------------------------------------------------------------------------- /inst/sexputils/RNACI.h: -------------------------------------------------------------------------------- 1 | // NOTE: file generated automatically from RNACI source; do not edit by hand 2 | 3 | // Copyright (c) 2014-2017, Drew Schmidt 4 | // All rights reserved. 5 | // 6 | // Redistribution and use in source and binary forms, with or without 7 | // modification, are permitted provided that the following conditions are met: 8 | // 9 | // 1. Redistributions of source code must retain the above copyright notice, this 10 | // list of conditions and the following disclaimer. 11 | // 12 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 13 | // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 16 | // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | 23 | // Changelog: 24 | // Version 0.5.0: 25 | // * Fixed several rchk warnings. 26 | // * Add boolean/logical allocators. 27 | // * Make is_Rnull() a simple macro. 28 | // * Rewrote allocator internals. 29 | // * Changed API for make_list/dataframe and friends. 30 | // 31 | // Version 0.4.0: 32 | // * Clean up internals; better internal guarding. 33 | // * Deprecate non-double float functions. 34 | // * Create build system for non-header-only uses. 35 | // * Fixed dataframe naming bug (Christian Heckendorf). 36 | // * Fixed segfault when creating 0-len dataframes in make_dataframe(). 37 | // 38 | // Version 0.3.0: 39 | // * Fixed warnings visible with -Wall -pedantic. 40 | // * Use strnlen() over strlen(); shorten string checks in allocator. 41 | // * Simplify initializer in allocator using memset(). 42 | // 43 | // Version 0.2.0: 44 | // * Converted to header only. 45 | // 46 | // Version 0.1.0: 47 | // * Initial release. 48 | 49 | 50 | #ifndef __RNACI_H__ 51 | #define __RNACI_H__ 52 | 53 | 54 | #ifndef __RNACI_API_H_ 55 | #define __RNACI_API_H_ 56 | 57 | 58 | #include 59 | #include 60 | 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | 67 | 68 | // ----------------------------------------------------------------------------- 69 | // Internals 70 | // ----------------------------------------------------------------------------- 71 | 72 | #define RNACI_VERSION 0.5.0 73 | 74 | #define RNACI_MAX(m,n) ((m)<(n) ? (n) : (m)) 75 | 76 | #define RNACI_IGNORED -1 77 | 78 | #define __RNACI_INT(x, y, ...) INTEGER(x)[y] 79 | #define __RNACI_DBL(x, y, ...) REAL(x)[y] 80 | #define __RNACI_STR(x, y, ...) ((char*)CHAR(STRING_ELT(x, y))) 81 | 82 | #define RNACI_PT(x) {PROTECT((x)); RNACI_ptct++;} 83 | 84 | #define OPTIONALARG1(a, b, c, ...) (a),(b),(c) 85 | 86 | static unsigned int RNACI_ptct = 0; 87 | 88 | 89 | static inline SEXP _make_list_names(int n, ...); 90 | static inline SEXP _make_list(SEXP R_list_names, const int n, ...); 91 | 92 | static inline void set_list_names(SEXP R_list, SEXP R_names); 93 | static inline void set_df_rownames(SEXP R_df, SEXP R_rownames); 94 | static inline void set_df_colnames(SEXP R_df, SEXP R_colnames); 95 | static inline void set_list_as_df(SEXP R_list); 96 | 97 | 98 | 99 | // ----------------------------------------------------------------------------- 100 | // Public Interface 101 | // ----------------------------------------------------------------------------- 102 | 103 | // --------- Defs --------- 104 | #define RNULL R_NilValue 105 | 106 | 107 | // --------- Accessors --------- 108 | #define INT(...) __RNACI_INT(__VA_ARGS__, 0, RNACI_IGNORED) 109 | #define DBL(...) __RNACI_DBL(__VA_ARGS__, 0, RNACI_IGNORED) 110 | #define STR(...) __RNACI_STR(__VA_ARGS__, 0, RNACI_IGNORED) 111 | 112 | #define MatINT(x,i,j) (INTEGER(x)[i + nrows(x)*j]) 113 | #define MatDBL(x,i,j) (REAL(x)[i + nrows(x)*j]) 114 | 115 | #define INTP(x) (INTEGER(x)) 116 | #define DBLP(x) (REAL(x)) 117 | 118 | 119 | // --------- gc stuff --------- 120 | #define hidefromGC(x) RNACI_PT(x); 121 | #define unhideGC() {UNPROTECT(RNACI_ptct); RNACI_ptct = 0;}; 122 | 123 | 124 | // --------- External pointers --------- 125 | #define newRptr(ptr,Rptr,fin) {RNACI_PT(Rptr = R_MakeExternalPtr(ptr, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(Rptr, fin, TRUE);} 126 | #define getRptr(ptr) R_ExternalPtrAddr(ptr); 127 | 128 | #define newRptrfreefun(FNAME,TYPE,FREEFUN) \ 129 | static inline void FNAME(SEXP ptr) \ 130 | { \ 131 | if (NULL == R_ExternalPtrAddr(ptr)) return; \ 132 | TYPE *tmp = (TYPE *) R_ExternalPtrAddr(ptr); \ 133 | FREEFUN(tmp); \ 134 | R_ClearExternalPtr(ptr); \ 135 | } \ 136 | void __ignore_me_just_here_for_semicolons(); 137 | 138 | 139 | // --------- Allocators --------- 140 | #define newRlist(x, n) {RNACI_PT((x) = __Rvecalloc(n, "vec", false));} 141 | #define newRvec(x, ...) {RNACI_PT((x) = __Rvecalloc(OPTIONALARG1(__VA_ARGS__, false, RNACI_IGNORED)));} 142 | #define newRmat(x, m, ...) {RNACI_PT((x) = __Rmatalloc(m, OPTIONALARG1(__VA_ARGS__, false, RNACI_IGNORED)));} 143 | 144 | #define setRclass(x,name) __Rsetclass(x, name); 145 | 146 | 147 | // --------- Dataframes and Lists --------- 148 | #define make_list_names(x, n, ...) {RNACI_PT((x) = _make_list_names(n, __VA_ARGS__));} 149 | #define make_list(x, list_names, n, ...) {RNACI_PT((x) = _make_list(list_names, n, __VA_ARGS__));} 150 | #define make_dataframe(x, rownames, colnames, n, ...) {RNACI_PT((x) = _make_dataframe(rownames, colnames, n, __VA_ARGS__));} 151 | 152 | 153 | // --------- Printing --------- 154 | #define Rputchar(c) Rprintf("%c", c) 155 | static inline void PRINT(SEXP x); 156 | 157 | 158 | // --------- is_ checkers --------- 159 | #define is_Rnull(x) ((x) == R_NilValue) 160 | #define is_double(x) (TYPEOF(x) == REALSXP) 161 | #define is_integer(x) (TYPEOF(x) == INTSXP) 162 | 163 | 164 | 165 | // ----------------------------------------------------------------------------- 166 | // Deprecated 167 | // ----------------------------------------------------------------------------- 168 | 169 | #define R_INIT 170 | #define R_END unhideGC() 171 | 172 | 173 | #endif 174 | 175 | 176 | 177 | //---------------------------------------------------------------- 178 | // Definitions 179 | //---------------------------------------------------------------- 180 | 181 | // ..//src/alloc.c 182 | static inline SEXP __Rvecalloc(int n, char *type, int init) 183 | { 184 | SEXP RET; 185 | 186 | if (strncmp(type, "vec", 1) == 0) 187 | { 188 | PROTECT(RET = allocVector(VECSXP, n)); 189 | } 190 | else if (strncmp(type, "int", 1) == 0) 191 | { 192 | PROTECT(RET = allocVector(INTSXP, n)); 193 | 194 | if (init) 195 | memset(INTP(RET), 0, n*sizeof(int)); 196 | } 197 | else if (strncmp(type, "double", 1) == 0) 198 | { 199 | PROTECT(RET = allocVector(REALSXP, n)); 200 | 201 | if (init) 202 | memset(DBLP(RET), 0, n*sizeof(double)); 203 | } 204 | else if (strncmp(type, "boolean", 1) == 0 || strncmp(type, "logical", 1) == 0) 205 | { 206 | PROTECT(RET = allocVector(LGLSXP, n)); 207 | 208 | if (init) 209 | memset(INTP(RET), 0, n*sizeof(int)); 210 | } 211 | else if (strncmp(type, "str", 1) == 0 || strncmp(type, "char*", 1) == 0) 212 | { 213 | PROTECT(RET = allocVector(STRSXP, n)); 214 | } 215 | else 216 | error("unknown allocation type\n"); 217 | 218 | UNPROTECT(1); 219 | return RET; 220 | } 221 | 222 | static inline SEXP __Rmatalloc(int m, int n, char *type, int init) 223 | { 224 | SEXP RET; 225 | 226 | if (strncmp(type, "vec", 1) == 0) 227 | { 228 | PROTECT(RET = allocMatrix(VECSXP, m, n)); 229 | } 230 | else if (strncmp(type, "int", 1) == 0) 231 | { 232 | PROTECT(RET = allocMatrix(INTSXP, m, n)); 233 | 234 | if (init) 235 | memset(INTP(RET), 0, m*n*sizeof(int)); 236 | } 237 | else if (strncmp(type, "double", 1) == 0) 238 | { 239 | PROTECT(RET = allocMatrix(REALSXP, m, n)); 240 | 241 | if (init) 242 | memset(DBLP(RET), 0, m*n*sizeof(double)); 243 | } 244 | else if (strncmp(type, "boolean", 1) == 0 || strncmp(type, "logical", 1) == 0) 245 | { 246 | PROTECT(RET = allocMatrix(LGLSXP, m, n)); 247 | 248 | if (init) 249 | memset(INTP(RET), 0, m*n*sizeof(int)); 250 | } 251 | else if (strncmp(type, "str", 1) == 0 || strncmp(type, "char*", 1) == 0) 252 | { 253 | PROTECT(RET = allocMatrix(STRSXP, m, n)); 254 | } 255 | else 256 | error("unknown allocation type\n"); 257 | 258 | UNPROTECT(1); 259 | return RET; 260 | } 261 | 262 | static inline SEXP __Rsetclass(SEXP x, char *name) 263 | { 264 | SEXP class; 265 | PROTECT(class = allocVector(STRSXP, 1)); 266 | SET_STRING_ELT(class, 0, mkChar(name)); 267 | classgets(x, class); 268 | UNPROTECT(1); 269 | return class; 270 | } 271 | 272 | 273 | 274 | // ..//src/misc.c 275 | static inline SEXP evalfun_stringarg(const char *const restrict fun, const char *const restrict arg) 276 | { 277 | SEXP ret, expr, fun_install, arg_str; 278 | PROTECT(fun_install = install(fun)); 279 | PROTECT(arg_str = ScalarString(mkChar(arg))); 280 | PROTECT(expr = lang2(fun_install, arg_str)); 281 | PROTECT(ret = eval(expr, R_GlobalEnv)); 282 | 283 | UNPROTECT(4); 284 | return ret; 285 | } 286 | 287 | 288 | 289 | static inline void PRINT(SEXP x) 290 | { 291 | SEXP basePackage; 292 | SEXP print_install; 293 | SEXP expr; 294 | 295 | PROTECT(basePackage = evalfun_stringarg("getNamespace", "base")); 296 | 297 | PROTECT(print_install = install("print")); 298 | PROTECT(expr = lang2(print_install, x)); 299 | eval(expr, basePackage); 300 | 301 | UNPROTECT(3); 302 | } 303 | 304 | 305 | 306 | // ..//src/structures_dataframes.c 307 | static inline SEXP make_dataframe_default_colnames(const int ncols) 308 | { 309 | int buflen; 310 | SEXP ret; 311 | 312 | if (ncols == 0) 313 | return RNULL; 314 | 315 | buflen = (int) (ceil(log10((double)ncols)) + 1.); 316 | char *buf = (char*) R_alloc(buflen, sizeof(*buf)); 317 | buf[0] = 'X'; 318 | 319 | PROTECT(ret = allocVector(VECSXP, ncols)); 320 | 321 | for (int i=0; i