├── .gitignore ├── data ├── negative │ ├── 1.txt │ ├── 2.txt │ ├── 3.txt │ ├── 4.txt │ └── 5.txt └── positive │ ├── 1.txt │ ├── 2.txt │ ├── 3.txt │ ├── 4.txt │ └── 5.txt ├── lib ├── classifier.js ├── corpus.js ├── document.js └── helpers.js ├── main.js ├── package.json └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | .DS_Store 10 | Thumbs.db 11 | 12 | pids 13 | logs 14 | results 15 | 16 | npm-debug.log 17 | node_modules/ -------------------------------------------------------------------------------- /lib/classifier.js: -------------------------------------------------------------------------------- 1 | var Document = require('./document'); 2 | var helpers = require('./helpers'); 3 | var _ = require('underscore'); 4 | 5 | var Classifier = function(positiveCorpus, negativeCorpus) { 6 | this.positiveCorpus = positiveCorpus; 7 | this.negativeCorpus = negativeCorpus; 8 | 9 | this.totalProbability = 0; 10 | this.inverseTotalProbability = 0; 11 | this.tolerance = 0.05; 12 | }; 13 | 14 | Classifier.prototype.classify = function(text) { 15 | var self = this; 16 | var stopWords = helpers.stopWords(); 17 | 18 | new Document(text).eachWord(function(word) { 19 | if (_.include(stopWords, word)) return; 20 | 21 | var positiveMatches = self.positiveCorpus.tokenCount(word); 22 | var negativeMatches = self.negativeCorpus.tokenCount(word); 23 | 24 | var probability = self.calculateProbability(positiveMatches, self.positiveCorpus.totalTokens, negativeMatches, self.negativeCorpus.totalTokens); 25 | self.recordProbability(probability); 26 | }); 27 | 28 | var finalProbability = this.combineProbabilities(); 29 | 30 | return { 31 | sentiment: this.computeSentiment(finalProbability), 32 | probability: finalProbability 33 | }; 34 | }; 35 | 36 | Classifier.prototype.calculateProbability = function(positiveMatches, positiveTotal, negativeMatches, negativeTotal) { 37 | var unknownWordStrength = 1.0; 38 | var unknownWordProbability = 0.5; 39 | 40 | var total = positiveMatches + negativeMatches; 41 | var positiveRatio = positiveMatches / parseFloat(positiveTotal); 42 | var negativeRatio = negativeMatches / parseFloat(negativeTotal); 43 | 44 | var probability = positiveRatio / (positiveRatio + negativeRatio); 45 | 46 | return ((unknownWordStrength * unknownWordProbability) + (total * probability)) / (unknownWordStrength + total); 47 | }; 48 | 49 | Classifier.prototype.recordProbability = function(probability) { 50 | if (isNaN(probability)) return; 51 | 52 | this.totalProbability = (this.totalProbability === 0) ? probability : this.totalProbability * probability; 53 | this.inverseTotalProbability = (this.inverseTotalProbability === 0) ? (1 - probability) : this.inverseTotalProbability * (1 - probability); 54 | }; 55 | 56 | Classifier.prototype.combineProbabilities = function() { 57 | if (this.totalProbability === 0) return 0.5; 58 | 59 | return this.totalProbability / (this.totalProbability + this.inverseTotalProbability); 60 | }; 61 | 62 | Classifier.prototype.computeSentiment = function(probability) { 63 | if (probability <= (0.5 - this.tolerance)) return 'negative'; 64 | if (probability >= (0.5 + this.tolerance)) return 'positive'; 65 | return 'neutral'; 66 | }; 67 | 68 | module.exports = Classifier; -------------------------------------------------------------------------------- /lib/corpus.js: -------------------------------------------------------------------------------- 1 | var Document = require('./document'); 2 | var path = require('path'); 3 | var fs = require('fs'); 4 | var _ = require('underscore'); 5 | 6 | var Corpus = function() { 7 | this.tokens = {}; 8 | this.totalTokens = 0; 9 | }; 10 | 11 | Corpus.prototype.add = function(document) { 12 | var self = this; 13 | 14 | document.eachWord(function(word) { 15 | self.tokens[word] = (self.tokens[word] || 0) + 1; 16 | }); 17 | }; 18 | 19 | Corpus.prototype.loadFromDirectory = function(directory) { 20 | var self = this; 21 | 22 | _.each(fs.readdirSync(directory), function(file) { 23 | var fileData = fs.readFileSync(path.join(directory, file), 'utf-8'); 24 | _.each(fileData.split('\n'), function(line) { 25 | self.add(new Document(line)); 26 | }); 27 | }); 28 | 29 | this.totalTokens = countTotalEntries(this.tokens); 30 | }; 31 | 32 | Corpus.prototype.tokenCount = function(word) { 33 | return this.tokens[word] || 0; 34 | }; 35 | 36 | var countTotalEntries = function(tokens) { 37 | var total = 0; 38 | 39 | _.each(Object.keys(tokens), function(word) { 40 | total += tokens[word]; 41 | }); 42 | 43 | return total; 44 | }; 45 | 46 | module.exports = Corpus; -------------------------------------------------------------------------------- /lib/document.js: -------------------------------------------------------------------------------- 1 | var helpers = require('./helpers'); 2 | var _ = require('underscore'); 3 | 4 | var Document = function(content) { 5 | var words = helpers.getWords(content); 6 | 7 | this.eachWord = function(callback) { 8 | _.map(words, callback); 9 | }; 10 | }; 11 | 12 | module.exports = Document; -------------------------------------------------------------------------------- /lib/helpers.js: -------------------------------------------------------------------------------- 1 | var _ = require('underscore'); 2 | 3 | var helpers = { 4 | stopWords: function() { 5 | return 'a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,really,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,totally,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your'.split(','); 6 | }, 7 | getWords: function(input) { 8 | var filtered = input.toString().replace(/[^0-9a-zA-Z ]/g, ''); 9 | 10 | var words = _.map(filtered.split(' '), function(word) { 11 | return word.trim().toLowerCase(); 12 | }); 13 | 14 | return _.filter(words, function(word) { 15 | return word.length > 0; 16 | }); 17 | } 18 | }; 19 | 20 | module.exports = helpers; -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | var Corpus = require('./lib/corpus'); 2 | var Document = require('./lib/document'); 3 | var SentimentClassifier = require('./lib/classifier'); 4 | 5 | var Classifier = function(positivePath, negativePath) { 6 | var positiveCorpus = new Corpus(); 7 | var negativeCorpus = new Corpus(); 8 | 9 | if (!positivePath) positivePath = __dirname + '/data/positive'; 10 | if (!negativePath) negativePath = __dirname + '/data/negative'; 11 | 12 | positiveCorpus.loadFromDirectory(positivePath); 13 | negativeCorpus.loadFromDirectory(negativePath); 14 | 15 | return { 16 | classify: function(text) { 17 | return new SentimentClassifier(positiveCorpus, negativeCorpus).classify(text); 18 | } 19 | }; 20 | }; 21 | 22 | module.exports = Classifier; 23 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-sentiment", 3 | "version": "0.0.1", 4 | "main": "main", 5 | "author": "Martin Rue ", 6 | "description": "A simple naive Bayes classifier for textual sentiment analysis", 7 | "dependencies": { 8 | "underscore": "~1.3.3" 9 | } 10 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # node-sentiment 2 | 3 | A simple naive Bayes classifier for textual sentiment analysis in [node.js](http://nodejs.org). 4 | 5 | ## Install 6 | 7 | ``` 8 | npm install https://github.com/martinrue/node-sentiment/tarball/master 9 | ``` 10 | 11 | ## Usage 12 | 13 | ``` js 14 | var SentimentClassifier = require('node-sentiment'); 15 | var classifier = new SentimentClassifier; 16 | 17 | classifier.classify('it is very sunny today'); 18 | // { sentiment: 'positive', probability: 0.8101596181696481 } 19 | ``` 20 | 21 | ## Data 22 | 23 | node-sentiment contains a default set of positive and negative data enough to seed the algorithm and produce decent results. The constructor function can optionally be passed custom paths to directories containing files with your own data. See [main.js](https://github.com/martinrue/node-sentiment/blob/master/main.js) if you need more details. --------------------------------------------------------------------------------