├── bin └── tweet-sentiment ├── src ├── index.js ├── predict.js ├── train.js ├── preprocess.js ├── cli.js └── getFeatures.js ├── .jshintignore ├── .travis.yml ├── Makefile ├── .npmignore ├── examples └── index.js ├── .gitignore ├── test └── test.js ├── LICENSE ├── data └── Makefile ├── .jshintrc ├── README.md └── package.json /bin/tweet-sentiment: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | require(__dirname + '/../lib/cli.js'); 4 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = { 4 | predict: require( './predict' ) 5 | }; 6 | -------------------------------------------------------------------------------- /.jshintignore: -------------------------------------------------------------------------------- 1 | # Directories # 2 | ############### 3 | reports/ 4 | 5 | # Node.js # 6 | ########### 7 | /node_modules/ 8 | 9 | # Git # 10 | ####### 11 | .git* 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.12" 4 | - "0.11" 5 | - "0.10" 6 | - "iojs" 7 | before_install: 8 | - sudo apt-get install unrar 9 | after_script: 10 | - npm run coveralls 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MAKE ?= make 2 | # BABEL 3 | BABEL ?= ./node_modules/.bin/babel 4 | 5 | all: 6 | $(BABEL) -d lib/ src/ 7 | 8 | clean: 9 | rm -f lib/*.js 10 | 11 | download: 12 | $(MAKE) -C data download 13 | 14 | clean-data: 15 | $(MAKE) -C data clean 16 | 17 | print-%: 18 | @echo $*=$($*) 19 | 20 | .PHONY: all, clean, clean-data, download, print-% 21 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git* 3 | 4 | # Utilities # 5 | ############# 6 | .jshintrc 7 | .jshintignore 8 | .travis.yml 9 | .editorconfig 10 | 11 | # Directories # 12 | ############### 13 | reports/ 14 | test/ 15 | 16 | # Node.js # 17 | ########### 18 | .npmignore 19 | /node_modules/ 20 | 21 | # Logs # 22 | ######## 23 | *.log 24 | 25 | # CSV # 26 | ####### 27 | 28 | *.csv 29 | -------------------------------------------------------------------------------- /examples/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var ts = require( '../lib/index.js'); 4 | 5 | var tweets = [ 6 | 'New poll shows more than half of Americans feel shaky about the economy. RETWEET if you are one of them. @FoxBusiness', 7 | 'Labour have NO credibility on the economy. We all know @Ed_Miliband has never had a proper job & doesn\'t know first thing about business', 8 | 'GOP is already complaining Obama won\'t be around to clean up the mess. Republicans are never good at handling peace and a strong economy.', 9 | 'Two-thirds of leading UK economists say coalition austerity had been bad for the economy', 10 | 'Great example of how lucrative the sharing economy can be' 11 | ]; 12 | 13 | console.log( ts.predict( tweets ) ); 14 | -------------------------------------------------------------------------------- /src/predict.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const _ = require( 'lodash' ); 4 | const svm = require( 'node-svm' ); 5 | const processTweet = require( './getFeatures.js' ); 6 | const path = require( 'path' ); 7 | 8 | var model = require( path.normalize( __dirname + '/../model/model.json' ) ); 9 | var classifier = svm.restore(model); 10 | 11 | function predict( tweet ) { 12 | var testdata; 13 | if ( Array.isArray(tweet) === true) { 14 | testdata = tweet.map( x => _.values( processTweet(x) ) ); 15 | return testdata.map( (x) => classifier.predictSync(x) ); 16 | } else { 17 | testdata = _.values( processTweet(tweet) ); 18 | return classifier.predictSync( testdata ); 19 | } 20 | } 21 | 22 | module.exports = exports = predict; 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Lib folder 6 | lib/ 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | 13 | # Directory for instrumented libs generated by jscoverage/JSCover 14 | lib-cov 15 | 16 | # Coverage directory used by tools like istanbul 17 | coverage 18 | 19 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 20 | .grunt 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # Commenting this out is preferred by some people, see 27 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 28 | node_modules 29 | 30 | # Users Environment Variables 31 | .lock-wscript 32 | 33 | reports/ 34 | tests/ 35 | 36 | !data/Makefile 37 | data/ 38 | -------------------------------------------------------------------------------- /src/train.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const fs = require( 'fs' ); 4 | const svm = require( 'node-svm' ); 5 | const _ = require( 'lodash' ); 6 | const util = require( 'util' ); 7 | const path = require( 'path' ); 8 | 9 | var data = require( '../model/data' ); 10 | 11 | var dataset = data.features.map( (e, i) => { 12 | var o = []; 13 | o[0] = e; 14 | o[1] = data.sentiments[i]; 15 | return o; 16 | }); 17 | 18 | var clf = new svm.CSVC({ 19 | kernelType: 'linear', 20 | probability: true, 21 | c:[0.005, 0.01,0.125,0.5,1,2] 22 | }); 23 | 24 | clf.train(dataset) 25 | .progress( function( rate ) { 26 | console.log( rate ); 27 | }) 28 | .spread( (trainedModel, trainingReport) => { 29 | console.log(trainingReport); 30 | fs.writeFileSync( path.normalize( __dirname + '/../model/model.json' ), JSON.stringify(trainedModel) ); 31 | }); 32 | -------------------------------------------------------------------------------- /src/preprocess.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const fs = require( 'fs' ); 4 | const parse = require( 'csv-parse' ); 5 | const _ = require( 'lodash' ); 6 | const path = require( 'path' ); 7 | 8 | const processTweet = require( './getFeatures' ); 9 | 10 | var sentiments = []; 11 | var features = []; 12 | var str = fs.readFileSync( path.normalize( __dirname + '/../data/econTweets.csv' ) ); 13 | parse(str, { delimiter: ',' }, function(err, output){ 14 | output.forEach( (line, index) => { 15 | sentiments.push( line[1] ); 16 | features.push( _.values( processTweet(line[3]) ) ); 17 | console.log( index ); 18 | }); 19 | sentiments = sentiments.map( (x) => x > 0 ? 1 : -1); 20 | 21 | var o = { 22 | features: features, 23 | sentiments: sentiments 24 | }; 25 | 26 | fs.writeFileSync( path.normalize( __dirname + '/../model/data.json' ), JSON.stringify(o) ); 27 | 28 | }); 29 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var chai = require( 'chai' ); 4 | var expect = chai.expect; 5 | 6 | var predict = require( '../lib/predict' ); 7 | 8 | describe( 'predict', function tests() { 9 | 10 | it( 'correctly predicts a single positive tweet', function test() { 11 | var result = predict( 'This is great news, I just got a job.' ); 12 | expect(result).to.be.equal( 1 ); 13 | }); 14 | 15 | it( 'correctly predicts a single negative tweet', function test() { 16 | var result = predict( 'The economy is terrible right now, layoffs everywhere.' ); 17 | expect(result).to.be.equal( -1 ); 18 | }); 19 | 20 | it( 'correctly predicts an array of tweets', function test() { 21 | var result = predict( ['This is great news, I just got a job.', 'The economy is terrible right now, layoffs everywhere.'] ); 22 | expect(result).to.be.deep.equal( [ 1, -1 ] ); 23 | }); 24 | 25 | }); 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Philipp Burckhardt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /data/Makefile: -------------------------------------------------------------------------------- 1 | download: NRC Sentiment140 BingLiu NRC_Emotion 2 | 3 | NRC: 4 | wget http://www.saifmohammad.com/WebDocs/NRC-Hashtag-Sentiment-Lexicon-v0.1.zip 5 | unzip NRC-Hashtag-Sentiment-Lexicon-v0.1.zip 6 | rm NRC-Hashtag-Sentiment-Lexicon-v0.1.zip 7 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt.gz 8 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt.gz 9 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt.gz 10 | 11 | Sentiment140: 12 | wget http://www.umiacs.umd.edu/~saif/WebDocs/Sentiment140-Lexicon-v0.1.zip 13 | unzip Sentiment140-Lexicon-v0.1.zip 14 | rm Sentiment140-Lexicon-v0.1.zip 15 | gunzip Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt.gz 16 | gunzip Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt.gz 17 | gunzip Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt.gz 18 | 19 | BingLiu: 20 | wget http://www.cs.uic.edu/~liub/FBS/opinion-lexicon-English.rar 21 | unrar x opinion-lexicon-English.rar 22 | rm opinion-lexicon-English.rar 23 | 24 | NRC_Emotion: 25 | wget http://saifmohammad.com/Lexicons/NRC-Emotion-Lexicon-v0.92.zip 26 | unzip NRC-Emotion-Lexicon-v0.92.zip 27 | rm NRC-Emotion-Lexicon-v0.92.zip 28 | 29 | clean: 30 | rm -rf ./Sentiment140-Lexicon-v0.1 31 | rm -rf ./__MACOSX 32 | rm -f negative-words.txt 33 | rm -f positive-words.txt 34 | rm -rf ./NRC-Hashtag-Sentiment-Lexicon-v0.1 35 | rm -rf ./NRC-Emotion-Lexicon-v0.92 36 | rm -f *.zip 37 | rm -f *.rar 38 | 39 | .PHONY: clean, download 40 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "bitwise": false, 3 | "camelcase": false, 4 | "curly": true, 5 | "eqeqeq": true, 6 | "es3": false, 7 | "forin": true, 8 | "freeze": true, 9 | "immed": true, 10 | "indent": 4, 11 | "latedef": "nofunc", 12 | "newcap": true, 13 | "noarg": true, 14 | "noempty": false, 15 | "nonbsp": true, 16 | "nonew": true, 17 | "plusplus": false, 18 | "undef": true, 19 | "unused": true, 20 | "strict": true, 21 | "maxparams": 10, 22 | "maxdepth": 5, 23 | "maxstatements": 100, 24 | "maxcomplexity": false, 25 | "maxlen": 1000, 26 | "asi": false, 27 | "boss": false, 28 | "debug": false, 29 | "eqnull": false, 30 | "esnext": true, 31 | "evil": false, 32 | "expr": true, 33 | "funcscope": false, 34 | "globalstrict": false, 35 | "iterator": false, 36 | "lastsemic": false, 37 | "laxbreak": false, 38 | "laxcomma": false, 39 | "loopfunc": false, 40 | "maxerr": 1000, 41 | "moz": false, 42 | "multistr": false, 43 | "notypeof": false, 44 | "proto": false, 45 | "scripturl": false, 46 | "shadow": false, 47 | "sub": true, 48 | "supernew": false, 49 | "validthis": false, 50 | "noyield": false, 51 | "browser": true, 52 | "browserify": true, 53 | "couch": false, 54 | "devel": true, 55 | "dojo": false, 56 | "jasmine": false, 57 | "jquery": false, 58 | "mocha": true, 59 | "mootools": false, 60 | "node": true, 61 | "nonstandard": false, 62 | "prototypejs": false, 63 | "qunit": false, 64 | "quotmark": "single", 65 | "rhino": false, 66 | "shelljs": false, 67 | "worker": false, 68 | "wsh": false, 69 | "yui": false, 70 | "globals": {} 71 | } 72 | -------------------------------------------------------------------------------- /src/cli.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | // load modules 4 | const program = require( 'commander' ); 5 | const fs = require( 'fs' ); 6 | const predict = require( './predict.js'); 7 | 8 | var predictedSentiments = []; 9 | 10 | program 11 | .version('0.1.0'); 12 | 13 | program 14 | .command('predict ') 15 | .description('predict sentiment of tweets') 16 | .option('-o, --output [value]', 'File name of generated JSON file') 17 | .action( (input, options) => { 18 | var inputData = fs.createReadStream( input ); 19 | readLines( inputData, makePrediction, options ); 20 | }); 21 | 22 | program 23 | .parse(process.argv); 24 | 25 | function readLines( input, func, options ) { 26 | var remaining = ''; 27 | 28 | input.on( 'data', (data) => { 29 | remaining += data; 30 | var index = remaining.indexOf( '\n' ); 31 | while ( index > -1 ) { 32 | var line = remaining.substring( 0, index ); 33 | remaining = remaining.substring( index + 1 ); 34 | func( line ); 35 | index = remaining.indexOf( '\n' ); 36 | } 37 | }); 38 | 39 | input.on( 'end', () => { 40 | if ( remaining.length > 0 ) { 41 | func(remaining); 42 | } 43 | 44 | savePredictions( options ); 45 | 46 | }); 47 | } 48 | 49 | function makePrediction( text ) { 50 | predictedSentiments.push( predict( text ) ); 51 | } 52 | 53 | function savePredictions( options ) { 54 | fs.writeFileSync( options.output, JSON.stringify(predictedSentiments) ); 55 | } 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![NPM version][npm-image]][npm-url] 2 | [![Build Status][travis-image]][travis-url] 3 | [![Coverage Status][coveralls-image]][coveralls-url] 4 | [![Dependencies][dependencies-image]][dependencies-url] 5 | 6 | # tweet-sentiment 7 | 8 | > SVM Classifier to Detect Sentiment of Tweets. The package implements the procedure described in ["NRC-Canada: Building the State-of-the-Art in the paper 9 | Sentiment Analysis of Tweets"](http://www.umiacs.umd.edu/~saif/WebDocs/sentimentMKZ.pdf) by Saif M. Mohammad, Svetlana Kiritchenko, and Xiaodan Zhu 10 | 11 | ## Installation 12 | 13 | The tool should be installed globally such that it can be invoked from any directory in the terminal via the command tweet-sentiment. 14 | 15 | ``` 16 | npm install tweet-sentiment -g 17 | ``` 18 | 19 | ## Getting Started 20 | 21 | After installation, it is possible to obtain help about the possible options of the program by typing 22 | 23 | ``` 24 | tweet-sentiment --help 25 | ``` 26 | 27 | ## Command Line Interface 28 | 29 | ### tweet-sentiment predict [options] \ 30 | 31 | First Header | Second Header 32 | ------------- | ------------- 33 | -h, --help | output usage information 34 | -o, --output [value] | Name of output file 35 | 36 | [npm-image]: https://badge.fury.io/js/tweet-sentiment.svg 37 | [npm-url]: http://badge.fury.io/js/tweet-sentiment 38 | 39 | [travis-image]: https://travis-ci.org/Planeshifter/tweet-sentiment.svg 40 | [travis-url]: https://travis-ci.org/Planeshifter/tweet-sentiment 41 | 42 | [coveralls-image]: https://img.shields.io/coveralls/Planeshifter/tweet-sentiment/master.svg 43 | [coveralls-url]: https://coveralls.io/r/Planeshifter/tweet-sentiment?branch=master 44 | 45 | [dependencies-image]: http://img.shields.io/david/Planeshifter/tweet-sentiment.svg 46 | [dependencies-url]: https://david-dm.org/Planeshifter/tweet-sentiment 47 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@planeshifter/tweet-sentiment", 3 | "version": "0.2.5", 4 | "description": "SVM Classifier to Detect Sentiment of Tweets", 5 | "main": "lib/index.js", 6 | "preferGlobal": "true", 7 | "bin": { 8 | "tweet-sentiment": "./bin/tweet-sentiment" 9 | }, 10 | "scripts": { 11 | "test": "./node_modules/.bin/mocha", 12 | "prepublish": "make all", 13 | "postinstall": "make download", 14 | "test-cov": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coverage -- -R spec", 15 | "coveralls": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coveralls/coverage --report lcovonly -- -R spec && cat ./reports/coveralls/coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./reports/coveralls" 16 | }, 17 | "repository": { 18 | "type": "git", 19 | "url": "https://github.com/Planeshifter/tweet-sentiment.git" 20 | }, 21 | "keywords": [ 22 | "sentiment-analysis", 23 | "tweets", 24 | "twitter", 25 | "social-media", 26 | "NLP", 27 | "text-mining" 28 | ], 29 | "author": "Philipp Burckhardt", 30 | "license": "MIT", 31 | "bugs": { 32 | "url": "https://github.com/Planeshifter/tweet-sentiment/issues" 33 | }, 34 | "homepage": "https://github.com/Planeshifter/tweet-sentiment", 35 | "dependencies": { 36 | "commander": "^2.7.1", 37 | "csv-parse": "^0.1.0", 38 | "e": "0.0.4", 39 | "emotional-emoticons": "0.0.1", 40 | "lodash": "^3.6.0", 41 | "node-svm": "^2.1.4", 42 | "plus_arrays": "^0.1.5", 43 | "pos": "^0.1.9", 44 | "ramda": "^0.13.0", 45 | "svm": "^0.1.1" 46 | }, 47 | "devDependencies": { 48 | "babel": "^4.7.16", 49 | "chai": "^2.2.0", 50 | "coveralls": "^2.11.2", 51 | "istanbul": "^0.3.13", 52 | "jshint": "^2.6.3", 53 | "jshint-stylish": "^1.0.1", 54 | "mocha": "^2.2.1" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/getFeatures.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const R = require( 'ramda' ); 4 | const _ = require( 'lodash' ); 5 | const pos = require( 'pos' ); 6 | const emotions = require( 'emotional-emoticons' ); 7 | const path = require( 'path' ); 8 | require( 'plus_arrays' ); 9 | 10 | const fs = require( 'fs' ); 11 | 12 | const bingLiuLexicon = { 13 | positive: fs.readFileSync( path.normalize( __dirname + '/../data/positive-words.txt' ) ).toString().split( '\n' ).filter( (w, i) => i > 34), 14 | negative: fs.readFileSync( path.normalize( __dirname + '/../data/negative-words.txt') ).toString().split( '\n' ).filter( (w, i) => i > 34) 15 | }; 16 | 17 | const sentiment140Lexicon = fs.readFileSync( path.normalize( __dirname + '/../data/Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt' ) ) 18 | .toString() 19 | .split( '\n' ) 20 | .map( e => e.split( '\t' )); 21 | 22 | const hashtagSentimentLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt' ) ) 23 | .toString() 24 | .split( '\n' ) 25 | .map( e => e.split( '\t' )); 26 | 27 | var emotionLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt' ) ) 28 | .toString() 29 | .split( '\n' ) 30 | .map( e => e.split( '\t' )) 31 | .filter( e => e[1] === 'positive' || e[1] === 'negative' ).filter( e => e[2] === '1' ? true : false); 32 | 33 | function getPartOfSpeechCounts( text ) { 34 | var words = new pos.Lexer().lex( text ); 35 | var taggedWords = new pos.Tagger().tag(words); 36 | var tags = taggedWords.map( (w) => w[1] ); 37 | var counts = _.countBy(tags); 38 | return counts; 39 | } 40 | 41 | function replaceURLs( tweet ) { 42 | var myRegEx = /[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g; 43 | return tweet.replace( myRegEx, 'someurl'); 44 | } 45 | 46 | function replaceUsers( tweet ) { 47 | var myRegEx = /@[A-Za-z0-9_]{1,15}/g; 48 | return tweet.replace( myRegEx, '@someuser'); 49 | } 50 | 51 | function getNoHashtags( tweet ) { 52 | var matches = tweet.match(/\#+[\w_]+[\w\'_\-]*[\w_]+/g); 53 | return matches ? matches.length : 0; 54 | } 55 | 56 | 57 | function getNoAllCaps( tweet ) { 58 | var matches = tweet.match(/\b[A-Z]+\b/g); 59 | return matches ? matches.length : 0; 60 | } 61 | 62 | function getEmoticons( tweet ) { 63 | var emoticonRegEx = /[<>]?[:;=8][\-o\*\']?[\)\]\(\[dDpP/\:\}\{@\|\\]|[\)\]\(\[dDpP/\:\}\{@\|\\][\-o\*\']?[:;=8][<>]?/g; 64 | return tweet.match( emoticonRegEx ); 65 | } 66 | 67 | function getNoElongatedWords( tweet ) { 68 | var isElongated = /\b[A-Za-z]*([a-zA-Z])\1\1[A-Za-z]*\b/g; 69 | var matches = tweet.match( isElongated ); 70 | return matches ? matches.length : 0; 71 | } 72 | 73 | function getNoNegations( tweet ) { 74 | var negationRegEx = '(?:(?:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))|n\'t'; 75 | var punctRegEx = '[.:;!?]'; 76 | var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm'); 77 | var matches = tweet.match( myRegEx ); 78 | return matches ? matches.length : 0; 79 | } 80 | 81 | function markNegatedWords( tweet ) { 82 | 83 | var negationRegEx = '(never|nothing|nowhere|noone|none|not|no|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint|n\'t)'; 84 | var punctRegEx = '([.:;!?])'; 85 | var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm'); 86 | 87 | return tweet.replace( myRegEx, function( $0, $1, $2, $3){ 88 | $2 = $2.split(' ').filter( (w, i) => i > 0 ).map( w => w + '_NEG' ).join(' '); 89 | return( $1 + ' ' + $2 + $3); 90 | }); 91 | } 92 | 93 | function getBingLiuScores( tokens ) { 94 | 95 | var scores = tokens.map( (w) => { 96 | return bingLiuLexicon.positive.contains(w) ? 1 : bingLiuLexicon.negative.contains(w) ? -1 : 0; 97 | }); 98 | 99 | var output = { 100 | bingLiu_greaterZero: scores.filter( w => w > 0).length, 101 | bingLiu_totalScore: scores.reduce( (a, b) => a + b), 102 | bingLiu_maxScore: scores.max(), 103 | bingLiu_lastToken: scores.filter( w => w > 0).pop() || 0 104 | }; 105 | 106 | return output; 107 | 108 | } 109 | 110 | function getSentiment140Scores( tokens ) { 111 | 112 | var scores = tokens.map( (w) => { 113 | var match = sentiment140Lexicon.filter( e => e[0] === w); 114 | var res = match.length > 0 ? match[0][1] : 0; 115 | return parseFloat(res); 116 | }); 117 | 118 | var output = { 119 | sentiment140_greaterZero: scores.filter( w => w > 0).length, 120 | sentiment140_totalScore: scores.reduce( (a, b) => a + b), 121 | sentiment140_maxScore: scores.max(), 122 | sentiment140_lastToken: scores.filter( w => w > 0).pop() || 0 123 | }; 124 | 125 | return output; 126 | } 127 | 128 | function getHashtagSentimentScores( tokens ) { 129 | 130 | var scores = tokens.map( (w) => { 131 | var match = hashtagSentimentLexicon.filter( e => e[0] === w); 132 | var res = match.length > 0 ? match[0][1] : 0; 133 | return parseFloat(res); 134 | }); 135 | 136 | var output = { 137 | hashtagSentimentLexicon_greaterZero: scores.filter( w => w > 0).length, 138 | hashtagSentimentLexicon_totalScore: scores.reduce( (a, b) => a + b), 139 | hashtagSentimentLexicon_maxScore: scores.max(), 140 | hashtagSentimentLexicon_lastToken: scores.filter( w => w > 0).pop() || 0 141 | }; 142 | 143 | return output; 144 | } 145 | 146 | function getEmotionScores( tokens ) { 147 | 148 | var scores = tokens.map( (w) => { 149 | var match = emotionLexicon.filter( e => e[0] === w); 150 | var res = match.length > 0 ? match[0][1] : 0; 151 | return res === 'positive' ? 1 : res === 'negative' ? -1 : 0; 152 | }); 153 | 154 | var output = { 155 | nrcEmotion_greaterZero: scores.filter( w => w > 0).length, 156 | nrcEmotion_totalScore: scores.reduce( (a, b) => a + b), 157 | nrcEmotion_maxScore: scores.max(), 158 | nrcEmotion_lastToken: scores.filter( w => w > 0).pop() || 0 159 | }; 160 | 161 | return output; 162 | } 163 | 164 | function getEmoticonScores ( emoticons ) { 165 | 166 | if ( !emoticons ) { 167 | return { 168 | emoticon_greaterZero: 0, 169 | emoticon_totalScore: 0, 170 | emoticon_maxScore: 0, 171 | emoticon_lastToken: 0 172 | }; 173 | } 174 | 175 | var getEmoScore = function( icon ) { 176 | for (let key in emotions) { 177 | if ( emotions[key].e.contains( icon ) === true ) { 178 | return emotions[key].p; 179 | } 180 | } 181 | }; 182 | 183 | var scores = emoticons.map( (icon) => getEmoScore(icon) ); 184 | 185 | var output = { 186 | emoticon_greaterZero: scores.filter( w => w > 0).length, 187 | emoticon_totalScore: scores.reduce( (a, b) => a + b) || 0, 188 | emoticon_maxScore: scores.max() || 0, 189 | emoticon_lastToken: scores.filter( w => w > 0).pop() || 0 190 | }; 191 | 192 | return output; 193 | 194 | } 195 | 196 | 197 | function getFeatures( tweet ) { 198 | 199 | var preProcess = R.pipe( replaceURLs, replaceUsers ); 200 | var processedTweet = preProcess(tweet); 201 | 202 | tweet = markNegatedWords(tweet); 203 | 204 | var features = { 205 | allcaps: getNoAllCaps( processedTweet ), 206 | hashtags: getNoHashtags( processedTweet ), 207 | elongated: getNoElongatedWords( processedTweet ), 208 | negated: getNoNegations( processedTweet ) 209 | }; 210 | 211 | /* 212 | var tags = getPartOfSpeechCounts( processedTweet ); 213 | _.extend(features, tags); 214 | */ 215 | 216 | var tokens = new pos.Lexer().lex( processedTweet ); 217 | 218 | var bingLiuScores = getBingLiuScores( tokens ); 219 | _.extend(features, bingLiuScores); 220 | 221 | var sentiment140Scores = getSentiment140Scores( tokens ); 222 | _.extend(features, sentiment140Scores); 223 | 224 | var hashtagSentimentScores = getHashtagSentimentScores( tokens ); 225 | _.extend(features, hashtagSentimentScores); 226 | 227 | var emotionScores = getEmotionScores( tokens ); 228 | _.extend(features, emotionScores); 229 | 230 | var emoticons = getEmoticons( processedTweet); 231 | 232 | var emoticonScores = getEmoticonScores( emoticons ); 233 | _.extend(features, emoticonScores); 234 | 235 | return features; 236 | } 237 | 238 | module.exports = exports = getFeatures; 239 | --------------------------------------------------------------------------------