├── bin
    └── tweet-sentiment
├── src
    ├── index.js
    ├── predict.js
    ├── train.js
    ├── preprocess.js
    ├── cli.js
    └── getFeatures.js
├── .jshintignore
├── .travis.yml
├── Makefile
├── .npmignore
├── examples
    └── index.js
├── .gitignore
├── test
    └── test.js
├── LICENSE
├── data
    └── Makefile
├── .jshintrc
├── README.md
└── package.json


/bin/tweet-sentiment:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | 
3 | require(__dirname + '/../lib/cli.js');
4 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | 
3 | module.exports = {
4 |     predict: require( './predict' )
5 | };
6 | 


--------------------------------------------------------------------------------
/.jshintignore:
--------------------------------------------------------------------------------
 1 | # Directories #
 2 | ###############
 3 | reports/
 4 | 
 5 | # Node.js #
 6 | ###########
 7 | /node_modules/
 8 | 
 9 | # Git #
10 | #######
11 | .git*
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | node_js:
 3 |     - "0.12"
 4 |     - "0.11"
 5 |     - "0.10"
 6 |     - "iojs"
 7 | before_install:
 8 |     - sudo apt-get install unrar
 9 | after_script:
10 |     - npm run coveralls
11 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MAKE ?= make
 2 | # BABEL
 3 | BABEL ?= ./node_modules/.bin/babel
 4 | 
 5 | all:
 6 | 	$(BABEL) -d lib/ src/
 7 | 
 8 | clean:
 9 | 	rm -f lib/*.js
10 | 
11 | download:
12 | 	$(MAKE) -C data download
13 | 
14 | clean-data:
15 | 	$(MAKE) -C data clean
16 | 
17 | print-%:
18 | 	@echo $*=$($*)
19 | 
20 | .PHONY: all, clean, clean-data, download, print-%
21 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git*
 3 | 
 4 | # Utilities #
 5 | #############
 6 | .jshintrc
 7 | .jshintignore
 8 | .travis.yml
 9 | .editorconfig
10 | 
11 | # Directories #
12 | ###############
13 | reports/
14 | test/
15 | 
16 | # Node.js #
17 | ###########
18 | .npmignore
19 | /node_modules/
20 | 
21 | # Logs #
22 | ########
23 | *.log
24 | 
25 | # CSV #
26 | #######
27 | 
28 | *.csv
29 | 


--------------------------------------------------------------------------------
/examples/index.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | var ts = require( '../lib/index.js');
 4 | 
 5 | var tweets = [
 6 |     'New poll shows more than half of Americans feel shaky about the economy. RETWEET if you are one of them. @FoxBusiness',
 7 |     'Labour have NO credibility on the economy. We all know @Ed_Miliband has never had a proper job & doesn\'t know first thing about business',
 8 |     'GOP is already complaining Obama won\'t be around to clean up the mess. Republicans are never good at handling peace and a strong economy.',
 9 |     'Two-thirds of leading UK economists say coalition austerity had been bad for the economy',
10 |     'Great example of how lucrative the sharing economy can be'
11 | ];
12 | 
13 | console.log( ts.predict( tweets ) );
14 | 


--------------------------------------------------------------------------------
/src/predict.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const _ = require( 'lodash' );
 4 | const svm = require( 'node-svm' );
 5 | const processTweet = require( './getFeatures.js' );
 6 | const path = require( 'path' );
 7 | 
 8 | var model = require( path.normalize( __dirname + '/../model/model.json' ) );
 9 | var classifier = svm.restore(model);
10 | 
11 | function predict( tweet ) {
12 |     var testdata;
13 |     if ( Array.isArray(tweet) === true) {
14 |         testdata = tweet.map( x => _.values( processTweet(x) ) );
15 |         return testdata.map( (x) => classifier.predictSync(x) );
16 |     } else {
17 |         testdata =  _.values( processTweet(tweet) );
18 |         return classifier.predictSync( testdata );
19 |     }
20 | }
21 | 
22 | module.exports =  exports = predict;
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Lib folder
 6 | lib/
 7 | 
 8 | # Runtime data
 9 | pids
10 | *.pid
11 | *.seed
12 | 
13 | # Directory for instrumented libs generated by jscoverage/JSCover
14 | lib-cov
15 | 
16 | # Coverage directory used by tools like istanbul
17 | coverage
18 | 
19 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
20 | .grunt
21 | 
22 | # Compiled binary addons (http://nodejs.org/api/addons.html)
23 | build/Release
24 | 
25 | # Dependency directory
26 | # Commenting this out is preferred by some people, see
27 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git-
28 | node_modules
29 | 
30 | # Users Environment Variables
31 | .lock-wscript
32 | 
33 | reports/
34 | tests/
35 | 
36 | !data/Makefile
37 | data/
38 | 


--------------------------------------------------------------------------------
/src/train.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const fs = require( 'fs' );
 4 | const svm = require( 'node-svm' );
 5 | const _ = require( 'lodash' );
 6 | const util = require( 'util' );
 7 | const path = require( 'path' );
 8 | 
 9 | var data = require( '../model/data' );
10 | 
11 | var dataset = data.features.map( (e, i) => {
12 |     var o = [];
13 |     o[0] = e;
14 |     o[1] = data.sentiments[i];
15 |     return o;
16 | });
17 | 
18 | var clf = new svm.CSVC({
19 |     kernelType: 'linear',
20 |     probability: true,
21 |     c:[0.005, 0.01,0.125,0.5,1,2]
22 | });
23 | 
24 | clf.train(dataset)
25 |     .progress( function( rate ) {
26 |         console.log( rate );
27 |     })
28 |     .spread( (trainedModel, trainingReport) => {
29 |         console.log(trainingReport);
30 |         fs.writeFileSync( path.normalize( __dirname + '/../model/model.json' ), JSON.stringify(trainedModel) );
31 |     });
32 | 


--------------------------------------------------------------------------------
/src/preprocess.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const fs = require( 'fs' );
 4 | const parse = require( 'csv-parse' );
 5 | const _ = require( 'lodash' );
 6 | const path = require( 'path' );
 7 | 
 8 | const processTweet = require( './getFeatures' );
 9 | 
10 | var sentiments = [];
11 | var features = [];
12 | var str = fs.readFileSync( path.normalize( __dirname + '/../data/econTweets.csv' ) );
13 | parse(str, { delimiter: ',' }, function(err, output){
14 |     output.forEach( (line, index) => {
15 |             sentiments.push( line[1] );
16 |             features.push(  _.values( processTweet(line[3]) ) );
17 |             console.log( index );
18 |     });
19 |     sentiments = sentiments.map( (x) => x > 0 ? 1 : -1);
20 | 
21 |     var o = {
22 |         features: features,
23 |         sentiments: sentiments
24 |     };
25 | 
26 |     fs.writeFileSync( path.normalize( __dirname + '/../model/data.json' ), JSON.stringify(o) );
27 | 
28 | });
29 | 


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | var chai = require( 'chai' );
 4 | var expect = chai.expect;
 5 | 
 6 | var predict = require( '../lib/predict' );
 7 | 
 8 | describe( 'predict', function tests() {
 9 | 
10 |     it( 'correctly predicts a single positive tweet', function test() {
11 |         var result = predict( 'This is great news, I just got a job.' );
12 |         expect(result).to.be.equal( 1 );
13 |     });
14 | 
15 |     it( 'correctly predicts a single negative tweet', function test() {
16 |         var result = predict( 'The economy is terrible right now, layoffs everywhere.' );
17 |         expect(result).to.be.equal( -1 );
18 |     });
19 | 
20 |     it( 'correctly predicts an array of tweets', function test() {
21 |         var result = predict( ['This is great news, I just got a job.', 'The economy is terrible right now, layoffs everywhere.'] );
22 |         expect(result).to.be.deep.equal( [ 1, -1 ] );
23 |     });
24 | 
25 | });
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Philipp Burckhardt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/data/Makefile:
--------------------------------------------------------------------------------
 1 | download: NRC Sentiment140 BingLiu NRC_Emotion
 2 | 
 3 | NRC:
 4 | 	wget http://www.saifmohammad.com/WebDocs/NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
 5 | 	unzip NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
 6 | 	rm NRC-Hashtag-Sentiment-Lexicon-v0.1.zip
 7 | 	gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt.gz
 8 | 	gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt.gz
 9 | 	gunzip NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt.gz
10 | 
11 | Sentiment140:
12 | 	wget http://www.umiacs.umd.edu/~saif/WebDocs/Sentiment140-Lexicon-v0.1.zip
13 | 	unzip Sentiment140-Lexicon-v0.1.zip
14 | 	rm Sentiment140-Lexicon-v0.1.zip
15 | 	gunzip Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt.gz
16 | 	gunzip Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt.gz
17 | 	gunzip Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt.gz
18 | 
19 | BingLiu:
20 | 	wget http://www.cs.uic.edu/~liub/FBS/opinion-lexicon-English.rar
21 | 	unrar x opinion-lexicon-English.rar
22 | 	rm opinion-lexicon-English.rar
23 | 
24 | NRC_Emotion:
25 | 	wget http://saifmohammad.com/Lexicons/NRC-Emotion-Lexicon-v0.92.zip
26 | 	unzip NRC-Emotion-Lexicon-v0.92.zip
27 | 	rm NRC-Emotion-Lexicon-v0.92.zip
28 | 
29 | clean:
30 | 	rm -rf ./Sentiment140-Lexicon-v0.1
31 | 	rm -rf ./__MACOSX
32 | 	rm -f negative-words.txt
33 | 	rm -f positive-words.txt
34 | 	rm -rf ./NRC-Hashtag-Sentiment-Lexicon-v0.1
35 | 	rm -rf ./NRC-Emotion-Lexicon-v0.92
36 | 	rm -f *.zip
37 | 	rm -f *.rar
38 | 
39 | .PHONY: clean, download
40 | 


--------------------------------------------------------------------------------
/.jshintrc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"bitwise": false,
 3 | 	"camelcase": false,
 4 | 	"curly": true,
 5 | 	"eqeqeq": true,
 6 | 	"es3": false,
 7 | 	"forin": true,
 8 | 	"freeze": true,
 9 | 	"immed": true,
10 | 	"indent": 4,
11 | 	"latedef": "nofunc",
12 | 	"newcap": true,
13 | 	"noarg": true,
14 | 	"noempty": false,
15 | 	"nonbsp": true,
16 | 	"nonew": true,
17 | 	"plusplus": false,
18 | 	"undef": true,
19 | 	"unused": true,
20 | 	"strict": true,
21 | 	"maxparams": 10,
22 | 	"maxdepth": 5,
23 | 	"maxstatements": 100,
24 | 	"maxcomplexity": false,
25 | 	"maxlen": 1000,
26 | 	"asi": false,
27 | 	"boss": false,
28 | 	"debug": false,
29 | 	"eqnull": false,
30 | 	"esnext": true,
31 | 	"evil": false,
32 | 	"expr": true,
33 | 	"funcscope": false,
34 | 	"globalstrict": false,
35 | 	"iterator": false,
36 | 	"lastsemic": false,
37 | 	"laxbreak": false,
38 | 	"laxcomma": false,
39 | 	"loopfunc": false,
40 | 	"maxerr": 1000,
41 | 	"moz": false,
42 | 	"multistr": false,
43 | 	"notypeof": false,
44 | 	"proto": false,
45 | 	"scripturl": false,
46 | 	"shadow": false,
47 | 	"sub": true,
48 | 	"supernew": false,
49 | 	"validthis": false,
50 | 	"noyield": false,
51 | 	"browser": true,
52 | 	"browserify": true,
53 | 	"couch": false,
54 | 	"devel": true,
55 | 	"dojo": false,
56 | 	"jasmine": false,
57 | 	"jquery": false,
58 | 	"mocha": true,
59 | 	"mootools": false,
60 | 	"node": true,
61 | 	"nonstandard": false,
62 | 	"prototypejs": false,
63 | 	"qunit": false,
64 | 	"quotmark": "single",
65 | 	"rhino": false,
66 | 	"shelljs": false,
67 | 	"worker": false,
68 | 	"wsh": false,
69 | 	"yui": false,
70 | 	"globals": {}
71 | }
72 | 


--------------------------------------------------------------------------------
/src/cli.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | // load modules
 4 | const program       = require( 'commander' );
 5 | const fs            = require( 'fs' );
 6 | const predict       = require( './predict.js');
 7 | 
 8 | var predictedSentiments = [];
 9 | 
10 | program
11 |     .version('0.1.0');
12 | 
13 | program
14 |     .command('predict <input>')
15 |     .description('predict sentiment of tweets')
16 |     .option('-o, --output [value]', 'File name of generated JSON file')
17 |     .action( (input, options) => {
18 |         var inputData = fs.createReadStream( input );
19 |         readLines( inputData, makePrediction, options );
20 |     });
21 | 
22 | program
23 | .parse(process.argv);
24 | 
25 | function readLines( input, func, options ) {
26 |     var remaining = '';
27 | 
28 |     input.on( 'data', (data) => {
29 |         remaining += data;
30 |         var index = remaining.indexOf( '\n' );
31 |         while ( index > -1 ) {
32 |             var line = remaining.substring( 0, index );
33 |             remaining = remaining.substring( index + 1 );
34 |             func( line );
35 |             index = remaining.indexOf( '\n' );
36 |         }
37 |     });
38 | 
39 |     input.on( 'end', () => {
40 |         if ( remaining.length > 0 ) {
41 |             func(remaining);
42 |         }
43 | 
44 |         savePredictions( options );
45 | 
46 |     });
47 | }
48 | 
49 | function makePrediction( text ) {
50 |     predictedSentiments.push( predict( text ) );
51 | }
52 | 
53 | function savePredictions( options ) {
54 |     fs.writeFileSync( options.output, JSON.stringify(predictedSentiments) );
55 | }
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![NPM version][npm-image]][npm-url]
 2 | [![Build Status][travis-image]][travis-url]
 3 | [![Coverage Status][coveralls-image]][coveralls-url]
 4 | [![Dependencies][dependencies-image]][dependencies-url]
 5 | 
 6 | # tweet-sentiment
 7 | 
 8 | > SVM Classifier to Detect Sentiment of Tweets. The package implements the procedure described in ["NRC-Canada: Building the State-of-the-Art in the paper
 9 | Sentiment Analysis of Tweets"](http://www.umiacs.umd.edu/~saif/WebDocs/sentimentMKZ.pdf) by Saif M. Mohammad, Svetlana Kiritchenko, and Xiaodan Zhu
10 | 
11 | ## Installation
12 | 
13 | The tool should be installed globally such that it can be invoked from any directory in the terminal via the command tweet-sentiment.
14 | 
15 | ```
16 | npm install tweet-sentiment -g
17 | ```
18 | 
19 | ## Getting Started
20 | 
21 | After installation, it is possible to obtain help about the possible options of the program by typing
22 | 
23 | ```
24 | tweet-sentiment --help
25 | ```
26 | 
27 | ## Command Line Interface
28 | 
29 | ### tweet-sentiment predict [options] \<file\>
30 | 
31 | First Header  | Second Header
32 | ------------- | -------------
33 | -h, --help            | output usage information
34 | -o, --output [value]  | Name of output file
35 | 
36 | [npm-image]: https://badge.fury.io/js/tweet-sentiment.svg
37 | [npm-url]: http://badge.fury.io/js/tweet-sentiment
38 | 
39 | [travis-image]: https://travis-ci.org/Planeshifter/tweet-sentiment.svg
40 | [travis-url]: https://travis-ci.org/Planeshifter/tweet-sentiment
41 | 
42 | [coveralls-image]: https://img.shields.io/coveralls/Planeshifter/tweet-sentiment/master.svg
43 | [coveralls-url]: https://coveralls.io/r/Planeshifter/tweet-sentiment?branch=master
44 | 
45 | [dependencies-image]: http://img.shields.io/david/Planeshifter/tweet-sentiment.svg
46 | [dependencies-url]: https://david-dm.org/Planeshifter/tweet-sentiment
47 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@planeshifter/tweet-sentiment",
 3 |   "version": "0.2.5",
 4 |   "description": "SVM Classifier to Detect Sentiment of Tweets",
 5 |   "main": "lib/index.js",
 6 |   "preferGlobal": "true",
 7 |   "bin": {
 8 |     "tweet-sentiment": "./bin/tweet-sentiment"
 9 |   },
10 |   "scripts": {
11 |     "test": "./node_modules/.bin/mocha",
12 |     "prepublish": "make all",
13 |     "postinstall": "make download",
14 |     "test-cov": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coverage -- -R spec",
15 |     "coveralls": "./node_modules/.bin/istanbul cover ./node_modules/.bin/_mocha --dir ./reports/coveralls/coverage --report lcovonly -- -R spec && cat ./reports/coveralls/coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./reports/coveralls"
16 |   },
17 |   "repository": {
18 |     "type": "git",
19 |     "url": "https://github.com/Planeshifter/tweet-sentiment.git"
20 |   },
21 |   "keywords": [
22 |     "sentiment-analysis",
23 |     "tweets",
24 |     "twitter",
25 |     "social-media",
26 |     "NLP",
27 |     "text-mining"
28 |   ],
29 |   "author": "Philipp Burckhardt",
30 |   "license": "MIT",
31 |   "bugs": {
32 |     "url": "https://github.com/Planeshifter/tweet-sentiment/issues"
33 |   },
34 |   "homepage": "https://github.com/Planeshifter/tweet-sentiment",
35 |   "dependencies": {
36 |     "commander": "^2.7.1",
37 |     "csv-parse": "^0.1.0",
38 |     "e": "0.0.4",
39 |     "emotional-emoticons": "0.0.1",
40 |     "lodash": "^3.6.0",
41 |     "node-svm": "^2.1.4",
42 |     "plus_arrays": "^0.1.5",
43 |     "pos": "^0.1.9",
44 |     "ramda": "^0.13.0",
45 |     "svm": "^0.1.1"
46 |   },
47 |   "devDependencies": {
48 |     "babel": "^4.7.16",
49 |     "chai": "^2.2.0",
50 |     "coveralls": "^2.11.2",
51 |     "istanbul": "^0.3.13",
52 |     "jshint": "^2.6.3",
53 |     "jshint-stylish": "^1.0.1",
54 |     "mocha": "^2.2.1"
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/getFeatures.js:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | 
  3 | const R = require( 'ramda' );
  4 | const _ = require( 'lodash' );
  5 | const pos = require( 'pos' );
  6 | const emotions = require( 'emotional-emoticons' );
  7 | const path = require( 'path' );
  8 | require( 'plus_arrays' );
  9 | 
 10 | const fs = require( 'fs' );
 11 | 
 12 | const bingLiuLexicon = {
 13 |     positive: fs.readFileSync( path.normalize( __dirname + '/../data/positive-words.txt' ) ).toString().split( '\n' ).filter( (w, i) => i > 34),
 14 |     negative: fs.readFileSync( path.normalize( __dirname + '/../data/negative-words.txt') ).toString().split( '\n' ).filter( (w, i) => i > 34)
 15 | };
 16 | 
 17 | const sentiment140Lexicon = fs.readFileSync( path.normalize( __dirname + '/../data/Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt' ) )
 18 |     .toString()
 19 |     .split( '\n' )
 20 |     .map( e => e.split( '\t' ));
 21 | 
 22 | const hashtagSentimentLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Hashtag-Sentiment-Lexicon-v0.1/unigrams-pmilexicon.txt' ) )
 23 |     .toString()
 24 |     .split( '\n' )
 25 |     .map( e => e.split( '\t' ));
 26 | 
 27 | var emotionLexicon = fs.readFileSync( path.normalize( __dirname + '/../data/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt' ) )
 28 |     .toString()
 29 |     .split( '\n' )
 30 |     .map( e => e.split( '\t' ))
 31 |     .filter( e => e[1] === 'positive' || e[1] === 'negative' ).filter( e => e[2] === '1' ? true : false);
 32 | 
 33 | function getPartOfSpeechCounts( text ) {
 34 |     var words = new pos.Lexer().lex( text );
 35 |     var taggedWords = new pos.Tagger().tag(words);
 36 |     var tags = taggedWords.map( (w) => w[1] );
 37 |     var counts = _.countBy(tags);
 38 |     return counts;
 39 | }
 40 | 
 41 | function replaceURLs( tweet ) {
 42 |     var myRegEx = /[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g;
 43 |     return tweet.replace( myRegEx, 'someurl');
 44 | }
 45 | 
 46 | function replaceUsers( tweet ) {
 47 |     var myRegEx = /@[A-Za-z0-9_]{1,15}/g;
 48 |     return tweet.replace( myRegEx, '@someuser');
 49 | }
 50 | 
 51 | function getNoHashtags( tweet ) {
 52 |     var matches = tweet.match(/\#+[\w_]+[\w\'_\-]*[\w_]+/g);
 53 |     return matches ? matches.length : 0;
 54 | }
 55 | 
 56 | 
 57 | function getNoAllCaps( tweet ) {
 58 |     var matches = tweet.match(/\b[A-Z]+\b/g);
 59 |     return matches ? matches.length : 0;
 60 | }
 61 | 
 62 | function getEmoticons( tweet ) {
 63 |     var emoticonRegEx = /[<>]?[:;=8][\-o\*\']?[\)\]\(\[dDpP/\:\}\{@\|\\]|[\)\]\(\[dDpP/\:\}\{@\|\\][\-o\*\']?[:;=8][<>]?/g;
 64 |     return tweet.match( emoticonRegEx );
 65 | }
 66 | 
 67 | function getNoElongatedWords( tweet ) {
 68 |     var isElongated = /\b[A-Za-z]*([a-zA-Z])\1\1[A-Za-z]*\b/g;
 69 |     var matches = tweet.match( isElongated );
 70 |     return matches ? matches.length : 0;
 71 | }
 72 | 
 73 | function getNoNegations( tweet ) {
 74 |     var negationRegEx = '(?:(?:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint))|n\'t';
 75 |     var punctRegEx = '[.:;!?]';
 76 |     var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm');
 77 |     var matches = tweet.match( myRegEx );
 78 |     return matches ? matches.length : 0;
 79 | }
 80 | 
 81 | function markNegatedWords( tweet ) {
 82 | 
 83 |     var negationRegEx = '(never|nothing|nowhere|noone|none|not|no|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint|n\'t)';
 84 |     var punctRegEx = '([.:;!?])';
 85 |     var myRegEx = new RegExp(negationRegEx + '(.*?)' + punctRegEx, 'gm');
 86 | 
 87 |     return tweet.replace( myRegEx, function( $0, $1, $2, $3){
 88 |         $2 = $2.split(' ').filter( (w, i) => i > 0 ).map( w => w + '_NEG' ).join(' ');
 89 |         return( $1 + ' ' + $2 + $3);
 90 |     });
 91 | }
 92 | 
 93 | function getBingLiuScores( tokens ) {
 94 | 
 95 |     var scores = tokens.map( (w) => {
 96 |         return bingLiuLexicon.positive.contains(w) ? 1 : bingLiuLexicon.negative.contains(w) ? -1 : 0;
 97 |     });
 98 | 
 99 |     var output = {
100 |         bingLiu_greaterZero: scores.filter( w => w > 0).length,
101 |         bingLiu_totalScore: scores.reduce( (a, b) => a + b),
102 |         bingLiu_maxScore: scores.max(),
103 |         bingLiu_lastToken: scores.filter( w => w > 0).pop() || 0
104 |     };
105 | 
106 |     return output;
107 | 
108 | }
109 | 
110 | function getSentiment140Scores( tokens ) {
111 | 
112 |     var scores = tokens.map( (w) => {
113 |         var match = sentiment140Lexicon.filter( e => e[0] === w);
114 |         var res = match.length > 0 ? match[0][1] : 0;
115 |         return parseFloat(res);
116 |     });
117 | 
118 |     var output = {
119 |         sentiment140_greaterZero: scores.filter( w => w > 0).length,
120 |         sentiment140_totalScore: scores.reduce( (a, b) => a + b),
121 |         sentiment140_maxScore: scores.max(),
122 |         sentiment140_lastToken: scores.filter( w => w > 0).pop() || 0
123 |     };
124 | 
125 |     return output;
126 | }
127 | 
128 | function getHashtagSentimentScores( tokens ) {
129 | 
130 |     var scores = tokens.map( (w) => {
131 |         var match = hashtagSentimentLexicon.filter( e => e[0] === w);
132 |         var res = match.length > 0 ? match[0][1] : 0;
133 |         return parseFloat(res);
134 |     });
135 | 
136 |     var output = {
137 |         hashtagSentimentLexicon_greaterZero: scores.filter( w => w > 0).length,
138 |         hashtagSentimentLexicon_totalScore: scores.reduce( (a, b) => a + b),
139 |         hashtagSentimentLexicon_maxScore: scores.max(),
140 |         hashtagSentimentLexicon_lastToken: scores.filter( w => w > 0).pop() || 0
141 |     };
142 | 
143 |     return output;
144 | }
145 | 
146 | function getEmotionScores( tokens ) {
147 | 
148 |     var scores = tokens.map( (w) => {
149 |         var match = emotionLexicon.filter( e => e[0] === w);
150 |         var res = match.length > 0 ? match[0][1] : 0;
151 |         return res === 'positive' ? 1 : res === 'negative' ? -1 : 0;
152 |     });
153 | 
154 |     var output = {
155 |         nrcEmotion_greaterZero: scores.filter( w => w > 0).length,
156 |         nrcEmotion_totalScore: scores.reduce( (a, b) => a + b),
157 |         nrcEmotion_maxScore: scores.max(),
158 |         nrcEmotion_lastToken: scores.filter( w => w > 0).pop() || 0
159 |     };
160 | 
161 |     return output;
162 | }
163 | 
164 | function getEmoticonScores ( emoticons ) {
165 | 
166 |     if ( !emoticons ) {
167 |         return {
168 |             emoticon_greaterZero: 0,
169 |             emoticon_totalScore: 0,
170 |             emoticon_maxScore: 0,
171 |             emoticon_lastToken: 0
172 |         };
173 |     }
174 | 
175 |     var getEmoScore = function( icon ) {
176 |         for (let key in emotions) {
177 |             if ( emotions[key].e.contains( icon ) === true ) {
178 |                 return emotions[key].p;
179 |             }
180 |         }
181 |     };
182 | 
183 |     var scores = emoticons.map( (icon) => getEmoScore(icon) );
184 | 
185 |     var output = {
186 |         emoticon_greaterZero: scores.filter( w => w > 0).length,
187 |         emoticon_totalScore: scores.reduce( (a, b) => a + b) || 0,
188 |         emoticon_maxScore: scores.max() || 0,
189 |         emoticon_lastToken: scores.filter( w => w > 0).pop() || 0
190 |     };
191 | 
192 |     return output;
193 | 
194 | }
195 | 
196 | 
197 | function getFeatures( tweet ) {
198 | 
199 |     var preProcess = R.pipe( replaceURLs, replaceUsers );
200 |     var processedTweet = preProcess(tweet);
201 | 
202 |     tweet = markNegatedWords(tweet);
203 | 
204 |     var features = {
205 |         allcaps: getNoAllCaps( processedTweet ),
206 |         hashtags: getNoHashtags( processedTweet ),
207 |         elongated: getNoElongatedWords( processedTweet ),
208 |         negated: getNoNegations( processedTweet )
209 |     };
210 | 
211 |     /*
212 |     var tags =  getPartOfSpeechCounts( processedTweet );
213 |     _.extend(features, tags);
214 |     */
215 | 
216 |     var tokens = new pos.Lexer().lex( processedTweet );
217 | 
218 |     var bingLiuScores = getBingLiuScores( tokens );
219 |     _.extend(features, bingLiuScores);
220 | 
221 |     var sentiment140Scores = getSentiment140Scores( tokens );
222 |     _.extend(features, sentiment140Scores);
223 | 
224 |     var hashtagSentimentScores = getHashtagSentimentScores( tokens );
225 |     _.extend(features, hashtagSentimentScores);
226 | 
227 |     var emotionScores = getEmotionScores( tokens );
228 |     _.extend(features, emotionScores);
229 | 
230 |     var emoticons = getEmoticons( processedTweet);
231 | 
232 |     var emoticonScores = getEmoticonScores( emoticons );
233 |     _.extend(features, emoticonScores);
234 | 
235 |     return features;
236 | }
237 | 
238 | module.exports = exports = getFeatures;
239 | 


--------------------------------------------------------------------------------