├── bin
└── tweet-sentiment
├── src
├── index.js
├── predict.js
├── train.js
├── preprocess.js
├── cli.js
└── getFeatures.js
├── .jshintignore
├── .travis.yml
├── Makefile
├── .npmignore
├── examples
└── index.js
├── .gitignore
├── test
└── test.js
├── LICENSE
├── data
└── Makefile
├── .jshintrc
├── README.md
└── package.json
/bin/tweet-sentiment:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | require(__dirname + '/../lib/cli.js');
4 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | module.exports = {
4 | predict: require( './predict' )
5 | };
6 |
--------------------------------------------------------------------------------
/.jshintignore:
--------------------------------------------------------------------------------
1 | # Directories #
2 | ###############
3 | reports/
4 |
5 | # Node.js #
6 | ###########
7 | /node_modules/
8 |
9 | # Git #
10 | #######
11 | .git*
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - "0.12"
4 | - "0.11"
5 | - "0.10"
6 | - "iojs"
7 | before_install:
8 | - sudo apt-get install unrar
9 | after_script:
10 | - npm run coveralls
11 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | MAKE ?= make
2 | # BABEL
3 | BABEL ?= ./node_modules/.bin/babel
4 |
5 | all:
6 | $(BABEL) -d lib/ src/
7 |
8 | clean:
9 | rm -f lib/*.js
10 |
11 | download:
12 | $(MAKE) -C data download
13 |
14 | clean-data:
15 | $(MAKE) -C data clean
16 |
17 | print-%:
18 | @echo $*=$($*)
19 |
20 | .PHONY: all, clean, clean-data, download, print-%
21 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | # Git
2 | .git*
3 |
4 | # Utilities #
5 | #############
6 | .jshintrc
7 | .jshintignore
8 | .travis.yml
9 | .editorconfig
10 |
11 | # Directories #
12 | ###############
13 | reports/
14 | test/
15 |
16 | # Node.js #
17 | ###########
18 | .npmignore
19 | /node_modules/
20 |
21 | # Logs #
22 | ########
23 | *.log
24 |
25 | # CSV #
26 | #######
27 |
28 | *.csv
29 |
--------------------------------------------------------------------------------
/examples/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var ts = require( '../lib/index.js');
4 |
5 | var tweets = [
6 | 'New poll shows more than half of Americans feel shaky about the economy. RETWEET if you are one of them. @FoxBusiness',
7 | 'Labour have NO credibility on the economy. We all know @Ed_Miliband has never had a proper job & doesn\'t know first thing about business',
8 | 'GOP is already complaining Obama won\'t be around to clean up the mess. Republicans are never good at handling peace and a strong economy.',
9 | 'Two-thirds of leading UK economists say coalition austerity had been bad for the economy',
10 | 'Great example of how lucrative the sharing economy can be'
11 | ];
12 |
13 | console.log( ts.predict( tweets ) );
14 |
--------------------------------------------------------------------------------
/src/predict.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const _ = require( 'lodash' );
4 | const svm = require( 'node-svm' );
5 | const processTweet = require( './getFeatures.js' );
6 | const path = require( 'path' );
7 |
8 | var model = require( path.normalize( __dirname + '/../model/model.json' ) );
9 | var classifier = svm.restore(model);
10 |
11 | function predict( tweet ) {
12 | var testdata;
13 | if ( Array.isArray(tweet) === true) {
14 | testdata = tweet.map( x => _.values( processTweet(x) ) );
15 | return testdata.map( (x) => classifier.predictSync(x) );
16 | } else {
17 | testdata = _.values( processTweet(tweet) );
18 | return classifier.predictSync( testdata );
19 | }
20 | }
21 |
22 | module.exports = exports = predict;
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 |
5 | # Lib folder
6 | lib/
7 |
8 | # Runtime data
9 | pids
10 | *.pid
11 | *.seed
12 |
13 | # Directory for instrumented libs generated by jscoverage/JSCover
14 | lib-cov
15 |
16 | # Coverage directory used by tools like istanbul
17 | coverage
18 |
19 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
20 | .grunt
21 |
22 | # Compiled binary addons (http://nodejs.org/api/addons.html)
23 | build/Release
24 |
25 | # Dependency directory
26 | # Commenting this out is preferred by some people, see
27 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git-
28 | node_modules
29 |
30 | # Users Environment Variables
31 | .lock-wscript
32 |
33 | reports/
34 | tests/
35 |
36 | !data/Makefile
37 | data/
38 |
--------------------------------------------------------------------------------
/src/train.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const fs = require( 'fs' );
4 | const svm = require( 'node-svm' );
5 | const _ = require( 'lodash' );
6 | const util = require( 'util' );
7 | const path = require( 'path' );
8 |
9 | var data = require( '../model/data' );
10 |
11 | var dataset = data.features.map( (e, i) => {
12 | var o = [];
13 | o[0] = e;
14 | o[1] = data.sentiments[i];
15 | return o;
16 | });
17 |
18 | var clf = new svm.CSVC({
19 | kernelType: 'linear',
20 | probability: true,
21 | c:[0.005, 0.01,0.125,0.5,1,2]
22 | });
23 |
24 | clf.train(dataset)
25 | .progress( function( rate ) {
26 | console.log( rate );
27 | })
28 | .spread( (trainedModel, trainingReport) => {
29 | console.log(trainingReport);
30 | fs.writeFileSync( path.normalize( __dirname + '/../model/model.json' ), JSON.stringify(trainedModel) );
31 | });
32 |
--------------------------------------------------------------------------------
/src/preprocess.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const fs = require( 'fs' );
4 | const parse = require( 'csv-parse' );
5 | const _ = require( 'lodash' );
6 | const path = require( 'path' );
7 |
8 | const processTweet = require( './getFeatures' );
9 |
10 | var sentiments = [];
11 | var features = [];
12 | var str = fs.readFileSync( path.normalize( __dirname + '/../data/econTweets.csv' ) );
13 | parse(str, { delimiter: ',' }, function(err, output){
14 | output.forEach( (line, index) => {
15 | sentiments.push( line[1] );
16 | features.push( _.values( processTweet(line[3]) ) );
17 | console.log( index );
18 | });
19 | sentiments = sentiments.map( (x) => x > 0 ? 1 : -1);
20 |
21 | var o = {
22 | features: features,
23 | sentiments: sentiments
24 | };
25 |
26 | fs.writeFileSync( path.normalize( __dirname + '/../model/data.json' ), JSON.stringify(o) );
27 |
28 | });
29 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var chai = require( 'chai' );
4 | var expect = chai.expect;
5 |
6 | var predict = require( '../lib/predict' );
7 |
8 | describe( 'predict', function tests() {
9 |
10 | it( 'correctly predicts a single positive tweet', function test() {
11 | var result = predict( 'This is great news, I just got a job.' );
12 | expect(result).to.be.equal( 1 );
13 | });
14 |
15 | it( 'correctly predicts a single negative tweet', function test() {
16 | var result = predict( 'The economy is terrible right now, layoffs everywhere.' );
17 | expect(result).to.be.equal( -1 );
18 | });
19 |
20 | it( 'correctly predicts an array of tweets', function test() {
21 | var result = predict( ['This is great news, I just got a job.', 'The economy is terrible right now, layoffs everywhere.'] );
22 | expect(result).to.be.deep.equal( [ 1, -1 ] );
23 | });
24 |
25 | });
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Philipp Burckhardt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/data/Makefile:
--------------------------------------------------------------------------------
1 | download: NRC Sentiment140 BingLiu NRC_Emotion
2 |
3 | NRC:
4 | wget http://www.saifmohammad.com/WebDocs/NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
5 | unzip NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
6 | rm NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
7 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt.gz
8 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt.gz
9 | gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt.gz
10 |
11 | Sentiment140:
12 | wget http://www.umiacs.umd.edu/~saif/WebDocs/Sentiment140-Lexicon-v0.1.zip
13 | unzip Sentiment140-Lexicon-v0.1.zip
14 | rm Sentiment140-Lexicon-v0.1.zip
15 | gunzip Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt.gz
16 | gunzip Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt.gz
17 | gunzip Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt.gz
18 |
19 | BingLiu:
20 | wget http://www.cs.uic.edu/~liub/FBS/opinion-lexicon-English.rar
21 | unrar x opinion-lexicon-English.rar
22 | rm opinion-lexicon-English.rar
23 |
24 | NRC_Emotion:
25 | wget http://saifmohammad.com/Lexicons/NRC-Emotion-Lexicon-v0.92.zip
26 | unzip NRC-Emotion-Lexicon-v0.92.zip
27 | rm NRC-Emotion-Lexicon-v0.92.zip
28 |
29 | clean:
30 | rm -rf ./Sentiment140-Lexicon-v0.1
31 | rm -rf ./__MACOSX
32 | rm -f negative-words.txt
33 | rm -f positive-words.txt
34 | rm -rf ./NRC-Hashtag-Sentiment-Lexicon-v0.1
35 | rm -rf ./NRC-Emotion-Lexicon-v0.92
36 | rm -f *.zip
37 | rm -f *.rar
38 |
39 | .PHONY: clean, download
40 |
--------------------------------------------------------------------------------
/.jshintrc:
--------------------------------------------------------------------------------
1 | {
2 | "bitwise": false,
3 | "camelcase": false,
4 | "curly": true,
5 | "eqeqeq": true,
6 | "es3": false,
7 | "forin": true,
8 | "freeze": true,
9 | "immed": true,
10 | "indent": 4,
11 | "latedef": "nofunc",
12 | "newcap": true,
13 | "noarg": true,
14 | "noempty": false,
15 | "nonbsp": true,
16 | "nonew": true,
17 | "plusplus": false,
18 | "undef": true,
19 | "unused": true,
20 | "strict": true,
21 | "maxparams": 10,
22 | "maxdepth": 5,
23 | "maxstatements": 100,
24 | "maxcomplexity": false,
25 | "maxlen": 1000,
26 | "asi": false,
27 | "boss": false,
28 | "debug": false,
29 | "eqnull": false,
30 | "esnext": true,
31 | "evil": false,
32 | "expr": true,
33 | "funcscope": false,
34 | "globalstrict": false,
35 | "iterator": false,
36 | "lastsemic": false,
37 | "laxbreak": false,
38 | "laxcomma": false,
39 | "loopfunc": false,
40 | "maxerr": 1000,
41 | "moz": false,
42 | "multistr": false,
43 | "notypeof": false,
44 | "proto": false,
45 | "scripturl": false,
46 | "shadow": false,
47 | "sub": true,
48 | "supernew": false,
49 | "validthis": false,
50 | "noyield": false,
51 | "browser": true,
52 | "browserify": true,
53 | "couch": false,
54 | "devel": true,
55 | "dojo": false,
56 | "jasmine": false,
57 | "jquery": false,
58 | "mocha": true,
59 | "mootools": false,
60 | "node": true,
61 | "nonstandard": false,
62 | "prototypejs": false,
63 | "qunit": false,
64 | "quotmark": "single",
65 | "rhino": false,
66 | "shelljs": false,
67 | "worker": false,
68 | "wsh": false,
69 | "yui": false,
70 | "globals": {}
71 | }
72 |
--------------------------------------------------------------------------------
/src/cli.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | // load modules
4 | const program = require( 'commander' );
5 | const fs = require( 'fs' );
6 | const predict = require( './predict.js');
7 |
8 | var predictedSentiments = [];
9 |
10 | program
11 | .version('0.1.0');
12 |
13 | program
14 | .command('predict ')
15 | .description('predict sentiment of tweets')
16 | .option('-o, --output [value]', 'File name of generated JSON file')
17 | .action( (input, options) => {
18 | var inputData = fs.createReadStream( input );
19 | readLines( inputData, makePrediction, options );
20 | });
21 |
22 | program
23 | .parse(process.argv);
24 |
25 | function readLines( input, func, options ) {
26 | var remaining = '';
27 |
28 | input.on( 'data', (data) => {
29 | remaining += data;
30 | var index = remaining.indexOf( '\n' );
31 | while ( index > -1 ) {
32 | var line = remaining.substring( 0, index );
33 | remaining = remaining.substring( index + 1 );
34 | func( line );
35 | index = remaining.indexOf( '\n' );
36 | }
37 | });
38 |
39 | input.on( 'end', () => {
40 | if ( remaining.length > 0 ) {
41 | func(remaining);
42 | }
43 |
44 | savePredictions( options );
45 |
46 | });
47 | }
48 |
49 | function makePrediction( text ) {
50 | predictedSentiments.push( predict( text ) );
51 | }
52 |
53 | function savePredictions( options ) {
54 | fs.writeFileSync( options.output, JSON.stringify(predictedSentiments) );
55 | }
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![NPM version][npm-image]][npm-url]
2 | [![Build Status][travis-image]][travis-url]
3 | [![Coverage Status][coveralls-image]][coveralls-url]
4 | [![Dependencies][dependencies-image]][dependencies-url]
5 |
6 | # tweet-sentiment
7 |
8 | > SVM Classifier to Detect Sentiment of Tweets. The package implements the procedure described in ["NRC-Canada: Building the State-of-the-Art in the paper
9 | Sentiment Analysis of Tweets"](http://www.umiacs.umd.edu/~saif/WebDocs/sentimentMKZ.pdf) by Saif M. Mohammad, Svetlana Kiritchenko, and Xiaodan Zhu
10 |
11 | ## Installation
12 |
13 | The tool should be installed globally such that it can be invoked from any directory in the terminal via the command tweet-sentiment.
14 |
15 | ```
16 | npm install tweet-sentiment -g
17 | ```
18 |
19 | ## Getting Started
20 |
21 | After installation, it is possible to obtain help about the possible options of the program by typing
22 |
23 | ```
24 | tweet-sentiment --help
25 | ```
26 |
27 | ## Command Line Interface
28 |
29 | ### tweet-sentiment predict [options] \
30 |
31 | First Header | Second Header
32 | ------------- | -------------
33 | -h, --help | output usage information
34 | -o, --output [value] | Name of output file
35 |
36 | [npm-image]: https://badge.fury.io/js/tweet-sentiment.svg
37 | [npm-url]: http://badge.fury.io/js/tweet-sentiment
38 |
39 | [travis-image]: https://travis-ci.org/Planeshifter/tweet-sentiment.svg
40 | [travis-url]: https://travis-ci.org/Planeshifter/tweet-sentiment
41 |
42 | [coveralls-image]: https://img.shields.io/coveralls/Planeshifter/tweet-sentiment/master.svg
43 | [coveralls-url]: https://coveralls.io/r/Planeshifter/tweet-sentiment?branch=master
44 |
45 | [dependencies-image]: http://img.shields.io/david/Planeshifter/tweet-sentiment.svg
46 | [dependencies-url]: https://david-dm.org/Planeshifter/tweet-sentiment
47 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@planeshifter/tweet-sentiment",
3 | "version": "0.2.5",
4 | "description": "SVM Classifier to Detect Sentiment of Tweets",
5 | "main": "lib/index.js",
6 | "preferGlobal": "true",
7 | "bin": {
8 | "tweet-sentiment": "./bin/tweet-sentiment"
9 | },
10 | "scripts": {
11 | "test": "./node_modules/.bin/mocha",
12 | "prepublish": "make all",
13 | "postinstall": "make download",
14 | "test-cov": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coverage -- -R spec",
15 | "coveralls": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coveralls/coverage --report lcovonly -- -R spec && cat ./reports/coveralls/coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./reports/coveralls"
16 | },
17 | "repository": {
18 | "type": "git",
19 | "url": "https://github.com/Planeshifter/tweet-sentiment.git"
20 | },
21 | "keywords": [
22 | "sentiment-analysis",
23 | "tweets",
24 | "twitter",
25 | "social-media",
26 | "NLP",
27 | "text-mining"
28 | ],
29 | "author": "Philipp Burckhardt",
30 | "license": "MIT",
31 | "bugs": {
32 | "url": "https://github.com/Planeshifter/tweet-sentiment/issues"
33 | },
34 | "homepage": "https://github.com/Planeshifter/tweet-sentiment",
35 | "dependencies": {
36 | "commander": "^2.7.1",
37 | "csv-parse": "^0.1.0",
38 | "e": "0.0.4",
39 | "emotional-emoticons": "0.0.1",
40 | "lodash": "^3.6.0",
41 | "node-svm": "^2.1.4",
42 | "plus_arrays": "^0.1.5",
43 | "pos": "^0.1.9",
44 | "ramda": "^0.13.0",
45 | "svm": "^0.1.1"
46 | },
47 | "devDependencies": {
48 | "babel": "^4.7.16",
49 | "chai": "^2.2.0",
50 | "coveralls": "^2.11.2",
51 | "istanbul": "^0.3.13",
52 | "jshint": "^2.6.3",
53 | "jshint-stylish": "^1.0.1",
54 | "mocha": "^2.2.1"
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/getFeatures.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const R = require( 'ramda' );
4 | const _ = require( 'lodash' );
5 | const pos = require( 'pos' );
6 | const emotions = require( 'emotional-emoticons' );
7 | const path = require( 'path' );
8 | require( 'plus_arrays' );
9 |
10 | const fs = require( 'fs' );
11 |
12 | const bingLiuLexicon = {
13 | positive: fs.readFileSync( path.normalize( __dirname + '/../data/positive-words.txt' ) ).toString().split( '\n' ).filter( (w, i) => i > 34),
14 | negative: fs.readFileSync( path.normalize( __dirname + '/../data/negative-words.txt') ).toString().split( '\n' ).filter( (w, i) => i > 34)
15 | };
16 |
17 | const sentiment140Lexicon = fs.readFileSync( path.normalize( __dirname + '/../data/Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt' ) )
18 | .toString()
19 | .split( '\n' )
20 | .map( e => e.split( '\t' ));
21 |
22 | const hashtagSentimentLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt' ) )
23 | .toString()
24 | .split( '\n' )
25 | .map( e => e.split( '\t' ));
26 |
27 | var emotionLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt' ) )
28 | .toString()
29 | .split( '\n' )
30 | .map( e => e.split( '\t' ))
31 | .filter( e => e[1] === 'positive' || e[1] === 'negative' ).filter( e => e[2] === '1' ? true : false);
32 |
33 | function getPartOfSpeechCounts( text ) {
34 | var words = new pos.Lexer().lex( text );
35 | var taggedWords = new pos.Tagger().tag(words);
36 | var tags = taggedWords.map( (w) => w[1] );
37 | var counts = _.countBy(tags);
38 | return counts;
39 | }
40 |
41 | function replaceURLs( tweet ) {
42 | var myRegEx = /[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g;
43 | return tweet.replace( myRegEx, 'someurl');
44 | }
45 |
46 | function replaceUsers( tweet ) {
47 | var myRegEx = /@[A-Za-z0-9_]{1,15}/g;
48 | return tweet.replace( myRegEx, '@someuser');
49 | }
50 |
51 | function getNoHashtags( tweet ) {
52 | var matches = tweet.match(/\#+[\w_]+[\w\'_\-]*[\w_]+/g);
53 | return matches ? matches.length : 0;
54 | }
55 |
56 |
57 | function getNoAllCaps( tweet ) {
58 | var matches = tweet.match(/\b[A-Z]+\b/g);
59 | return matches ? matches.length : 0;
60 | }
61 |
62 | function getEmoticons( tweet ) {
63 | var emoticonRegEx = /[<>]?[:;=8][\-o\*\']?[\)\]\(\[dDpP/\:\}\{@\|\\]|[\)\]\(\[dDpP/\:\}\{@\|\\][\-o\*\']?[:;=8][<>]?/g;
64 | return tweet.match( emoticonRegEx );
65 | }
66 |
67 | function getNoElongatedWords( tweet ) {
68 | var isElongated = /\b[A-Za-z]*([a-zA-Z])\1\1[A-Za-z]*\b/g;
69 | var matches = tweet.match( isElongated );
70 | return matches ? matches.length : 0;
71 | }
72 |
73 | function getNoNegations( tweet ) {
74 | var negationRegEx = '(?:(?:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))|n\'t';
75 | var punctRegEx = '[.:;!?]';
76 | var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm');
77 | var matches = tweet.match( myRegEx );
78 | return matches ? matches.length : 0;
79 | }
80 |
81 | function markNegatedWords( tweet ) {
82 |
83 | var negationRegEx = '(never|nothing|nowhere|noone|none|not|no|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint|n\'t)';
84 | var punctRegEx = '([.:;!?])';
85 | var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm');
86 |
87 | return tweet.replace( myRegEx, function( $0, $1, $2, $3){
88 | $2 = $2.split(' ').filter( (w, i) => i > 0 ).map( w => w + '_NEG' ).join(' ');
89 | return( $1 + ' ' + $2 + $3);
90 | });
91 | }
92 |
93 | function getBingLiuScores( tokens ) {
94 |
95 | var scores = tokens.map( (w) => {
96 | return bingLiuLexicon.positive.contains(w) ? 1 : bingLiuLexicon.negative.contains(w) ? -1 : 0;
97 | });
98 |
99 | var output = {
100 | bingLiu_greaterZero: scores.filter( w => w > 0).length,
101 | bingLiu_totalScore: scores.reduce( (a, b) => a + b),
102 | bingLiu_maxScore: scores.max(),
103 | bingLiu_lastToken: scores.filter( w => w > 0).pop() || 0
104 | };
105 |
106 | return output;
107 |
108 | }
109 |
110 | function getSentiment140Scores( tokens ) {
111 |
112 | var scores = tokens.map( (w) => {
113 | var match = sentiment140Lexicon.filter( e => e[0] === w);
114 | var res = match.length > 0 ? match[0][1] : 0;
115 | return parseFloat(res);
116 | });
117 |
118 | var output = {
119 | sentiment140_greaterZero: scores.filter( w => w > 0).length,
120 | sentiment140_totalScore: scores.reduce( (a, b) => a + b),
121 | sentiment140_maxScore: scores.max(),
122 | sentiment140_lastToken: scores.filter( w => w > 0).pop() || 0
123 | };
124 |
125 | return output;
126 | }
127 |
128 | function getHashtagSentimentScores( tokens ) {
129 |
130 | var scores = tokens.map( (w) => {
131 | var match = hashtagSentimentLexicon.filter( e => e[0] === w);
132 | var res = match.length > 0 ? match[0][1] : 0;
133 | return parseFloat(res);
134 | });
135 |
136 | var output = {
137 | hashtagSentimentLexicon_greaterZero: scores.filter( w => w > 0).length,
138 | hashtagSentimentLexicon_totalScore: scores.reduce( (a, b) => a + b),
139 | hashtagSentimentLexicon_maxScore: scores.max(),
140 | hashtagSentimentLexicon_lastToken: scores.filter( w => w > 0).pop() || 0
141 | };
142 |
143 | return output;
144 | }
145 |
146 | function getEmotionScores( tokens ) {
147 |
148 | var scores = tokens.map( (w) => {
149 | var match = emotionLexicon.filter( e => e[0] === w);
150 | var res = match.length > 0 ? match[0][1] : 0;
151 | return res === 'positive' ? 1 : res === 'negative' ? -1 : 0;
152 | });
153 |
154 | var output = {
155 | nrcEmotion_greaterZero: scores.filter( w => w > 0).length,
156 | nrcEmotion_totalScore: scores.reduce( (a, b) => a + b),
157 | nrcEmotion_maxScore: scores.max(),
158 | nrcEmotion_lastToken: scores.filter( w => w > 0).pop() || 0
159 | };
160 |
161 | return output;
162 | }
163 |
164 | function getEmoticonScores ( emoticons ) {
165 |
166 | if ( !emoticons ) {
167 | return {
168 | emoticon_greaterZero: 0,
169 | emoticon_totalScore: 0,
170 | emoticon_maxScore: 0,
171 | emoticon_lastToken: 0
172 | };
173 | }
174 |
175 | var getEmoScore = function( icon ) {
176 | for (let key in emotions) {
177 | if ( emotions[key].e.contains( icon ) === true ) {
178 | return emotions[key].p;
179 | }
180 | }
181 | };
182 |
183 | var scores = emoticons.map( (icon) => getEmoScore(icon) );
184 |
185 | var output = {
186 | emoticon_greaterZero: scores.filter( w => w > 0).length,
187 | emoticon_totalScore: scores.reduce( (a, b) => a + b) || 0,
188 | emoticon_maxScore: scores.max() || 0,
189 | emoticon_lastToken: scores.filter( w => w > 0).pop() || 0
190 | };
191 |
192 | return output;
193 |
194 | }
195 |
196 |
197 | function getFeatures( tweet ) {
198 |
199 | var preProcess = R.pipe( replaceURLs, replaceUsers );
200 | var processedTweet = preProcess(tweet);
201 |
202 | tweet = markNegatedWords(tweet);
203 |
204 | var features = {
205 | allcaps: getNoAllCaps( processedTweet ),
206 | hashtags: getNoHashtags( processedTweet ),
207 | elongated: getNoElongatedWords( processedTweet ),
208 | negated: getNoNegations( processedTweet )
209 | };
210 |
211 | /*
212 | var tags = getPartOfSpeechCounts( processedTweet );
213 | _.extend(features, tags);
214 | */
215 |
216 | var tokens = new pos.Lexer().lex( processedTweet );
217 |
218 | var bingLiuScores = getBingLiuScores( tokens );
219 | _.extend(features, bingLiuScores);
220 |
221 | var sentiment140Scores = getSentiment140Scores( tokens );
222 | _.extend(features, sentiment140Scores);
223 |
224 | var hashtagSentimentScores = getHashtagSentimentScores( tokens );
225 | _.extend(features, hashtagSentimentScores);
226 |
227 | var emotionScores = getEmotionScores( tokens );
228 | _.extend(features, emotionScores);
229 |
230 | var emoticons = getEmoticons( processedTweet);
231 |
232 | var emoticonScores = getEmoticonScores( emoticons );
233 | _.extend(features, emoticonScores);
234 |
235 | return features;
236 | }
237 |
238 | module.exports = exports = getFeatures;
239 |
--------------------------------------------------------------------------------