├── .gitignore ├── LICENSE ├── test ├── fixtures │ ├── classifier-with-limit.json │ ├── classifier-without-limit.json │ └── classifier-limit.json ├── base.js └── limit.js ├── generate.js ├── package.json ├── README.md └── src └── naivebayes.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Directory for instrumented libs generated by jscoverage/JSCover 12 | lib-cov 13 | 14 | # Coverage directory used by tools like istanbul 15 | coverage 16 | 17 | # nyc test coverage 18 | .nyc_output 19 | 20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 21 | .grunt 22 | 23 | # node-waf configuration 24 | .lock-wscript 25 | 26 | # Compiled binary addons (http://nodejs.org/api/addons.html) 27 | build/Release 28 | 29 | # Dependency directories 30 | node_modules 31 | jspm_packages 32 | 33 | # Optional npm cache directory 34 | .npm 35 | 36 | # Optional REPL history 37 | .node_repl_history 38 | 39 | test/fixtures/ham 40 | test/fixtures/spam 41 | test.js 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Surmon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/fixtures/classifier-with-limit.json: -------------------------------------------------------------------------------- 1 | {"categories":["positive","negative","foul"],"docCount":{"positive":5,"negative":5,"foul":6},"totalDocuments":16,"vocabulary":["","","This","","amazing","is","Get","Go","to","hell","awesome","movie","Yeah","Oh","boy","Sweet","this","is","incredibly","perfect","great","Do","one","thing","at","a","time","and","do","well","Never","forget","to","say","thanks","Believe","in","yourself","terrible","crappy","thing","Dang","Stinks","ugh","bad","annoying","No","why","dumb","Are","you","serious","sucks","I","don","t","want","to","be","here","out","Beat","it","lost","You","SOB","(son","of","a)","","SOG","(son","of","Gun)","","Damn","you",""],"wordCount":{"positive":35,"negative":30,"foul":29},"wordFrequencyCount":{"positive":{"":5,"amazing":2,"awesome":1,"movie":1,"Yeah":1,"Oh":1,"boy":1,"Sweet":1,"this":1,"is":1,"incredibly":1,"perfect":1,"great":1,"Do":1,"one":1,"thing":1,"at":1,"a":1,"time":1,"and":1,"do":1,"well":1,"Never":1,"forget":1,"to":1,"say":1,"thanks":1,"Believe":1,"in":1,"yourself":1},"negative":{"":3,"This":3,"is":2,"terrible":1,"crappy":1,"thing":1,"Dang":1,"Stinks":1,"ugh":1,"bad":1,"annoying":1,"No":1,"why":1,"dumb":1,"Are":1,"you":1,"serious":1,"sucks":1,"I":1,"don":1,"t":1,"want":1,"to":1,"be":1,"here":1},"foul":{"":6,"Get":2,"Go":2,"to":2,"hell":2,"out":1,"Beat":1,"it":1,"lost":1,"You":1,"SOB":1,"(son":2,"of":2,"a)":1,"SOG":1,"Gun)":1,"Damn":1,"you":1}},"options":{"vocabularyLimit":80}} 2 | -------------------------------------------------------------------------------- /test/fixtures/classifier-without-limit.json: -------------------------------------------------------------------------------- 1 | {"categories":["positive","negative","foul"],"docCount":{"positive":5,"negative":5,"foul":6},"totalDocuments":16,"vocabulary":["amazing","awesome","movie","Yeah","Oh","boy","","Sweet","this","is","incredibly","amazing","perfect","great","","Do","one","thing","at","a","time","and","do","well","","Never","forget","to","say","thanks","","Believe","in","yourself","","terrible","crappy","thing","Dang","Stinks","","ugh","bad","This","is","annoying","","No","why","This","is","dumb","Are","you","serious","This","sucks","","I","don","t","want","to","be","here","Get","out","Beat","it","lost","","Go","to","hell","the","devil","","Oh","hell","s","bells","","You","SOB","(son","of","a)","","SOG","(son","of","Gun)","","Damn","you",""],"wordCount":{"positive":35,"negative":30,"foul":34},"wordFrequencyCount":{"positive":{"amazing":2,"awesome":1,"movie":1,"Yeah":1,"Oh":1,"boy":1,"":5,"Sweet":1,"this":1,"is":1,"incredibly":1,"perfect":1,"great":1,"Do":1,"one":1,"thing":1,"at":1,"a":1,"time":1,"and":1,"do":1,"well":1,"Never":1,"forget":1,"to":1,"say":1,"thanks":1,"Believe":1,"in":1,"yourself":1},"negative":{"terrible":1,"crappy":1,"thing":1,"Dang":1,"Stinks":1,"":3,"ugh":1,"bad":1,"This":3,"is":2,"annoying":1,"No":1,"why":1,"dumb":1,"Are":1,"you":1,"serious":1,"sucks":1,"I":1,"don":1,"t":1,"want":1,"to":1,"be":1,"here":1},"foul":{"Get":2,"out":1,"Beat":1,"it":1,"lost":1,"":6,"Go":2,"to":2,"hell":2,"the":1,"devil":1,"Oh":1,"s":1,"bells":1,"You":1,"SOB":1,"(son":2,"of":2,"a)":1,"SOG":1,"Gun)":1,"Damn":1,"you":1}}} 2 | -------------------------------------------------------------------------------- /generate.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | const { promisify } = require('util'); 4 | const { readDirDeep } = require('read-dir-deep'); 5 | const { simpleParser } = require('mailparser'); 6 | 7 | const NaiveBayes = require('.'); 8 | const classifier = new NaiveBayes({ vocabularyLimit: 300, stopwords: true }); 9 | 10 | const readFile = promisify(fs.readFile); 11 | const writeFile = promisify(fs.writeFile); 12 | 13 | const FIXTURES_PATH = path.join(process.cwd(), 'test', 'fixtures'); 14 | const HAM_PATH = path.join(FIXTURES_PATH, 'ham'); 15 | const SPAM_PATH = path.join(FIXTURES_PATH, 'spam'); 16 | 17 | async function getEmailFromSource(filepath) { 18 | const sourceFiles = await readDirDeep(filepath); 19 | const files = await Promise.all(sourceFiles.map((file) => readFile(file))); 20 | const emails = await Promise.all(files.map((file) => simpleParser(file))); 21 | const texts = emails.map((email) => email.text); 22 | return texts; 23 | } 24 | 25 | (async () => { 26 | const hamEmails = await getEmailFromSource(HAM_PATH); 27 | console.log('got ham emails'); 28 | const spamEmails = await getEmailFromSource(SPAM_PATH); 29 | console.log('got spam emails'); 30 | 31 | for (const text of hamEmails) { 32 | if (text) { 33 | classifier.learn(text.trim(), 'ham'); 34 | } 35 | } 36 | 37 | for (const text of spamEmails) { 38 | if (text) { 39 | classifier.learn(text.trim(), 'spam'); 40 | } 41 | } 42 | 43 | await writeFile( 44 | path.join(FIXTURES_PATH, 'classifier-limit.json'), 45 | classifier.toJson() 46 | ); 47 | })(); 48 | -------------------------------------------------------------------------------- /test/base.js: -------------------------------------------------------------------------------- 1 | const test = require('ava'); 2 | const NaiveBayes = require('../src/naivebayes.js'); 3 | const classifier = new NaiveBayes(); 4 | 5 | function decode(text) { 6 | return Buffer.from(text, 'base64').toString(); 7 | } 8 | 9 | test('naivebayes', (t) => { 10 | classifier.learn('amazing, awesome movie!! Yeah!! Oh boy.', 'positive'); 11 | classifier.learn( 12 | 'Sweet, this is incredibly, amazing, perfect, great!!', 13 | 'positive' 14 | ); 15 | classifier.learn('Do one thing at a time, and do well.', 'positive'); 16 | classifier.learn('Never forget to say “thanks”.', 'positive'); 17 | classifier.learn('Believe in yourself.', 'positive'); 18 | 19 | classifier.learn('terrible, crappy thing. Dang. Stinks!!', 'negative'); 20 | classifier.learn('ugh, bad. This is annoying.', 'negative'); 21 | classifier.learn('crud, this sucks', 'negative'); 22 | classifier.learn('awful, no way', 'negative'); 23 | 24 | classifier.learn(decode('R2V0IG91dCAhQmVhdCBpdCEgR2V0IGxvc3Qh'), 'foul'); 25 | classifier.learn(decode('R28gdG8gaGVsbCEgR28gdG8gdGhlIGRldmlsIQ=='), 'foul'); 26 | classifier.learn(decode('T2gsIGhlbGwncyBiZWxscyE='), 'foul'); 27 | classifier.learn(decode('WW91IFNPQiAoc29uIG9mIGEpIQ=='), 'foul'); 28 | classifier.learn(decode('U09HIChzb24gb2YgR3VuKSE='), 'foul'); 29 | classifier.learn(decode('RGFtbiB5b3Uh'), 'foul'); 30 | 31 | const classifierJson = classifier.toJson(); 32 | const classifierJsonObject = classifier.toJsonObject(); 33 | t.is(typeof classifierJson, 'string'); 34 | t.is(typeof classifierJsonObject, 'object'); 35 | t.deepEqual(classifierJsonObject.categories, [ 36 | 'positive', 37 | 'negative', 38 | 'foul' 39 | ]); 40 | }); 41 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@ladjs/naivebayes", 3 | "description": "Naive Bayes Classifier for JavaScript.", 4 | "version": "0.0.4", 5 | "author": { 6 | "name": "Shaun Warman", 7 | "url": "https://github.com/ladjs" 8 | }, 9 | "bugs": { 10 | "url": "https://github.com/ladjs/naivebayes/issues", 11 | "email": "shaunwarman1@gmail.com" 12 | }, 13 | "commitlint": { 14 | "extends": [ 15 | "@commitlint/config-conventional" 16 | ] 17 | }, 18 | "contributors": [ 19 | "Surmon (http://surmon.me/)", 20 | "Shaun Warman (https://shaunwarman.com/)" 21 | ], 22 | "dependencies": { 23 | "debug": "^4.1.1", 24 | "stopword": "^1.0.1" 25 | }, 26 | "devDependencies": { 27 | "@commitlint/cli": "latest", 28 | "@commitlint/config-conventional": "latest", 29 | "ava": "latest", 30 | "codecov": "latest", 31 | "cross-env": "latest", 32 | "eslint": "latest", 33 | "eslint-config-xo-lass": "latest", 34 | "fixpack": "latest", 35 | "husky": "latest", 36 | "lint-staged": "latest", 37 | "mailparser": "^2.7.7", 38 | "nyc": "latest", 39 | "read-dir-deep": "^7.0.1", 40 | "remark-cli": "latest", 41 | "remark-preset-github": "latest", 42 | "xo": "latest" 43 | }, 44 | "files": [ 45 | "src" 46 | ], 47 | "husky": { 48 | "hooks": { 49 | "pre-commit": "lint-staged && npm test", 50 | "commit-msg": "commitlint -E HUSKY_GIT_PARAMS" 51 | } 52 | }, 53 | "license": "MIT", 54 | "lint-staged": { 55 | "*.js": [ 56 | "xo --fix", 57 | "git add" 58 | ], 59 | "*.md": [ 60 | "remark . -qfo", 61 | "git add" 62 | ], 63 | "package.json": [ 64 | "fixpack", 65 | "git add" 66 | ] 67 | }, 68 | "main": "src/naivebayes.js", 69 | "prettier": { 70 | "singleQuote": true, 71 | "bracketSpacing": true, 72 | "trailingComma": "none" 73 | }, 74 | "private": false, 75 | "publishConfig": { 76 | "access": "public" 77 | }, 78 | "remarkConfig": { 79 | "plugins": [ 80 | "preset-github" 81 | ] 82 | }, 83 | "repository": { 84 | "type": "git", 85 | "url": "https://github.com/ladjs/naivebayes" 86 | }, 87 | "scripts": { 88 | "ava": "cross-env NODE_ENV=test ava", 89 | "coverage": "nyc report --reporter=text-lcov > coverage.lcov && codecov", 90 | "lint": "xo && remark . -qfo", 91 | "lint:fix": "xo --fix", 92 | "nyc": "cross-env NODE_ENV=test nyc ava", 93 | "test": "yarn run lint && yarn run ava", 94 | "test-coverage": "yarn run lint && yarn run nyc" 95 | }, 96 | "xo": { 97 | "prettier": true, 98 | "space": true, 99 | "extends": [ 100 | "xo-lass" 101 | ] 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /test/limit.js: -------------------------------------------------------------------------------- 1 | const test = require('ava'); 2 | const NaiveBayes = require('../src/naivebayes.js'); 3 | 4 | function decode(text) { 5 | return Buffer.from(text, 'base64').toString(); 6 | } 7 | 8 | test('naivebayes with limit', (t) => { 9 | const classifier = new NaiveBayes({ vocabularyLimit: 10 }); 10 | 11 | classifier.learn('amazing, awesome movie!! Yeah!! Oh boy.', 'positive'); 12 | classifier.learn( 13 | 'Sweet, this is incredibly, amazing, perfect, great!!', 14 | 'positive' 15 | ); 16 | classifier.learn('Do one thing at a time, and do well.', 'positive'); 17 | classifier.learn('Never forget to say “thanks”.', 'positive'); 18 | classifier.learn('Believe in yourself.', 'positive'); 19 | 20 | classifier.learn('terrible, crappy thing. Dang. Stinks!!', 'negative'); 21 | classifier.learn('ugh, bad. This is annoying.', 'negative'); 22 | classifier.learn('No, why. This is dumb', 'negative'); 23 | classifier.learn('Are you serious? This sucks!', 'negative'); 24 | classifier.learn("I don't want to be here", 'negative'); 25 | 26 | classifier.learn(decode('R2V0IG91dCAhQmVhdCBpdCEgR2V0IGxvc3Qh'), 'foul'); 27 | classifier.learn(decode('R28gdG8gaGVsbCEgR28gdG8gdGhlIGRldmlsIQ=='), 'foul'); 28 | classifier.learn(decode('T2gsIGhlbGwncyBiZWxscyE='), 'foul'); 29 | classifier.learn(decode('WW91IFNPQiAoc29uIG9mIGEpIQ=='), 'foul'); 30 | classifier.learn(decode('U09HIChzb24gb2YgR3VuKSE='), 'foul'); 31 | classifier.learn(decode('RGFtbiB5b3Uh'), 'foul'); 32 | 33 | const pFoul = classifier.categorize(decode('R2V0IGxvc3QgeW91IFNPQg==')); 34 | t.is(pFoul, 'foul'); 35 | 36 | const pNegative = classifier.categorize('Oh no that is crappy'); 37 | t.is(pNegative, 'negative'); 38 | 39 | const pPositive = classifier.categorize('Sweet that was awesome'); 40 | t.is(pPositive, 'positive'); 41 | 42 | const classifierJson = classifier.toJson(); 43 | const classifierJsonObject = classifier.toJsonObject(); 44 | t.is(typeof classifierJson, 'string'); 45 | t.is(typeof classifierJsonObject, 'object'); 46 | t.deepEqual(classifierJsonObject.categories, [ 47 | 'positive', 48 | 'negative', 49 | 'foul' 50 | ]); 51 | }); 52 | 53 | test('naivebayes from json with implicit limit', (t) => { 54 | const json = require('./fixtures/classifier-with-limit'); 55 | const classifier = NaiveBayes.fromJson(json); 56 | t.is(classifier.vocabularyLimit, 80); 57 | 58 | const pFoul = classifier.categorize(decode('WW91IGdldCBvdXQh')); 59 | t.is(pFoul, 'foul'); 60 | 61 | const pNegative = classifier.categorize('Oh no that is crappy'); 62 | t.is(pNegative, 'negative'); 63 | 64 | const pPositive = classifier.categorize('Sweet that was awesome'); 65 | t.is(pPositive, 'positive'); 66 | 67 | const state = classifier.toJsonObject(); 68 | 69 | t.true(state.vocabulary.length <= 80); 70 | }); 71 | 72 | test('naivebayes from json with explicit limit', (t) => { 73 | const json = require('./fixtures/classifier-with-limit'); 74 | const classifier = NaiveBayes.fromJson(json, 80); 75 | t.is(classifier.vocabularyLimit, 80); 76 | 77 | classifier.learn(decode('WW91IGdldCBvdXQh'), 'foul'); 78 | 79 | const pFoul = classifier.categorize(decode('WW91IGdldCBvdXQh')); 80 | t.is(pFoul, 'foul'); 81 | 82 | const pNegative = classifier.categorize('Oh no that is crappy'); 83 | t.is(pNegative, 'negative'); 84 | 85 | const pPositive = classifier.categorize('Sweet that was awesome'); 86 | t.is(pPositive, 'positive'); 87 | 88 | const state = classifier.toJsonObject(); 89 | 90 | t.true(state.vocabulary.length <= 80); 91 | }); 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [**@ladjs/naivebayes**](https://github.com/ladjs/naivebayes) 2 | 3 | [![build status](https://img.shields.io/travis/com/shaunwarman/naivebayes.svg)](https://travis-ci.com/shaunwarman/naivebayes) 4 | [![code coverage](https://img.shields.io/codecov/c/github/shaunwarman/naivebayes.svg)](https://codecov.io/gh/shaunwarman/naivebayes) 5 | [![code style](https://img.shields.io/badge/code_style-XO-5ed9c7.svg)](https://github.com/sindresorhus/xo) 6 | [![styled with prettier](https://img.shields.io/badge/styled_with-prettier-ff69b4.svg)](https://github.com/prettier/prettier) 7 | [![made with lass](https://img.shields.io/badge/made_with-lass-95CC28.svg)](https://lass.js.org) 8 | [![npm downloads](https://img.shields.io/npm/dt/@ladjs/naivebayes.svg)](https://npm.im/@ladjs/naivebayes) 9 | 10 | > A ladjs naivebayes package forked from surmon-china/naivebayes 11 | 12 | 13 | ## Table of Contents 14 | 15 | * [What can I use this for](#what-can-i-use-this-for) 16 | * [Install](#install) 17 | * [npm](#npm) 18 | * [yarn](#yarn) 19 | * [Usage](#usage) 20 | * [API](#api) 21 | * [Class](#class) 22 | * [Learn](#learn) 23 | * [Probabilities](#probabilities) 24 | * [Categorize](#categorize) 25 | * [ToJson](#tojson) 26 | * [ToJsonObject](#tojsonobject) 27 | * [FromJson](#fromjson) 28 | * [Debug](#debug) 29 | * [Contributors](#contributors) 30 | 31 | 32 | ## What can I use this for 33 | 34 | Naive-Bayes classifier for JavaScript. 35 | 36 | `naivebayes` takes a document (piece of text), and tells you what category that document belongs to. 37 | 38 | You can use this for categorizing any text content into any arbitrary set of **categories**. For example: 39 | 40 | * Is an email **spam**, or **not spam** ? 41 | * Is a news article about **technology**, **politics**, or **sports** ? 42 | * Is a piece of text expressing **positive** emotions, or **negative** emotions? 43 | 44 | 45 | ## Install 46 | 47 | ### npm 48 | 49 | ```sh 50 | npm install @ladjs/naivebayes 51 | ``` 52 | 53 | ### yarn 54 | 55 | ```sh 56 | yarn add @ladjs/naivebayes 57 | ``` 58 | 59 | 60 | ## Usage 61 | 62 | ```javascript 63 | const NaiveBayes = require('naivebayes') 64 | 65 | const classifier = new NaiveBayes() 66 | 67 | // teach it positive phrases 68 | classifier.learn('amazing, awesome movie!! Yeah!! Oh boy.', 'positive') 69 | classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive') 70 | 71 | // teach it a negative phrase 72 | classifier.learn('terrible, cruddy thing. Damn. Sucks!!', 'negative') 73 | 74 | // now ask it to categorize a document it has never seen before 75 | classifier.categorize('awesome, cool, amazing!! Yay.') 76 | // => 'positive' 77 | 78 | // serialize the classifier's state as a JSON string. 79 | const stateJson = classifier.toJson() 80 | 81 | // load the classifier back from its JSON representation. 82 | const revivedClassifier = NaiveBayes.fromJson(stateJson) 83 | 84 | ``` 85 | 86 | ```javascript 87 | const NaiveBayes = require('naivebayes') 88 | 89 | const Segment = require('segment') 90 | const segment = new Segment() 91 | 92 | segment.useDefault() 93 | 94 | const classifier = new NaiveBayes({ 95 | 96 | tokenizer(sentence) { 97 | 98 | const sanitized = sentence.replace(/[^(a-zA-Z\u4e00-\u9fa50-9_)+\s]/g, ' ') 99 | 100 | return segment.doSegment(sanitized, { simple: true }) 101 | } 102 | }) 103 | ``` 104 | 105 | 106 | ## API 107 | 108 | ### Class 109 | 110 | ```javascript 111 | const classifier = new NaiveBayes([options]) 112 | ``` 113 | 114 | Returns an instance of a Naive-Bayes Classifier. 115 | 116 | #### Options 117 | 118 | * `tokenizer(text)` - (type: `function`) - Configure your own tokenizer. 119 | * `vocabularyLimit` - (type: `number` default: 0) - Reference a max word count where `0` is the default, meaning no limit. 120 | * `stopwords` - (type: `boolean` default: false) - To remove [stopwords](https://en.wikipedia.org/wiki/Stop_words) from text 121 | 122 | Eg. 123 | 124 | ```javascript 125 | const classifier = new NaiveBayes({ 126 | tokenizer(text) { 127 | return text.split(' ') 128 | } 129 | }) 130 | ``` 131 | 132 | ### Learn 133 | 134 | ```javascript 135 | classifier.learn(text, category) 136 | ``` 137 | 138 | Teach your classifier what `category` the `text` belongs to. The more you teach your classifier, the more reliable it becomes. It will use what it has learned to identify new documents that it hasn't seen before. 139 | 140 | ### Probabilities 141 | 142 | ```javascript 143 | classifier.probabilities(text) 144 | ``` 145 | 146 | Returns an array of `{ category, probability }` objects with probability calculated for each category. Its judgement is based on what you have taught it with `.learn()`. 147 | 148 | ### Categorize 149 | 150 | ```javascript 151 | classifier.categorize(text ,[probability]) 152 | ``` 153 | 154 | Returns the `category` it thinks `text` belongs to. Its judgement is based on what you have taught it with `.learn()`. 155 | 156 | ### ToJson 157 | 158 | ```javascript 159 | classifier.toJson() 160 | ``` 161 | 162 | Returns the JSON representation of a classifier. This is the same as `JSON.stringify(classifier.toJsonObject())`. 163 | 164 | ### ToJsonObject 165 | 166 | ```javascript 167 | classifier.toJsonObject() 168 | ``` 169 | 170 | Returns a JSON-friendly representation of the classifier as an `object`. 171 | 172 | ### FromJson 173 | 174 | ```javascript 175 | const classifier = NaiveBayes.fromJson(jsonObject) 176 | ``` 177 | 178 | Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()`. 179 | 180 | ### Debug 181 | 182 | To run `naivebayes` in debug mode simply set `DEBUG=naivebayes` when running your script. 183 | 184 | 185 | ## Contributors 186 | 187 | | Name | Website | 188 | | ---------------- | -------------------------- | 189 | | **Surmon** | | 190 | | **Shaun Warman** | | 191 | -------------------------------------------------------------------------------- /test/fixtures/classifier-limit.json: -------------------------------------------------------------------------------- 1 | {"categories":["ham","spam"],"docCount":{"ham":2551,"spam":468},"totalDocuments":3019,"vocabulary":["0","1","2","3","4","5","7","10","12","18","20","21","22","25","26","27","28","29","30","51","99","2002","the","to","of","and","a","I","in","is","that","it","for","s","http","com","on","you","with","be","t","have","this","are","as","The","not","net","from","www","at","was","or","by","but","can","an","has","they","all","list","will","lists","my","if","use","we","about","","there","more","so","one","would","just","their","do","which","like","get","your","out","up","some","This","listinfo","what","time","who","It","people","he","|","m","been","me","freshrpms","than","no","mailing","said","Date","them","users","_______________________________________________","new","any","List","had","In","mailman","into","rpm","way","If","html","ve","could","RPM","email","You","because","https","were","his","first","its","our","spam","d","how","To","even","talk","mail","But","using","message","where","most","redhat","ll","line","click","see","linux","years","From","XML","exmh","many","Linux","still","C","problem","+","And","very","ie","We","being","That","apt","find","data","us","after","Exmh","fork","information","own","files","these","world","might","better","without","different","technology","ALB","last","while","DataPower","i","long","news","kernel","code","server","software","Bush","01","Aug","He","United","States","companies","e","ilug","Mr","yahoo","World","high","company","both","unseen","Yahoo","must","version","send","devel","workers","~","doing","Now","unsubscribe","source","against","Red","group","within","business","\\","Hat","root","her","Groups","network","Chris","works","him","web","i386","info","political","]","ever","F","msgs","l","subject","global","Wed","industry","August","seen","Java","says","terms","market","security","k","Capital","man","One","Your","America","processing","application","size","war","hardware","p","Powell","co","docs","Sun","Kelly","President","freedom","million","president","sent","trade","forteana","ximian","ftp","egroups","Sponsor","TM","Networks","7gSolB","Free","uk","CD","French","configure","Dave","Venrock","DVDs","Join","af6_decore","cpp","George","echo","British","Pacific","pub","+s","pt6YBB","0","1","2","3","4","5","7","8","10","12","14","18","23","24","25","30","31","50","66","71","72","80","81","100","500","the","to","of","and","you","a","in","for","your","com","is","I","this","www","[http","that","or","with","are","on","be","from","will","have","it","as","http","s","our","not","000","The","by","We","can","This","You","an","email","my","money","If","FREE","mail","e","out","more","net","all","A","do","people","was","we","To","t","gif]","TO","one","information","THE","list","at","has","YOU","get","only","receive","up","","make","if","images","me","please","send","OF","AND","just","E","address","business","name","over","de","any","asp","so","00","been","free","how","YOUR","Your","wish","ie","new","No","S","no","linux","they","who","jpg]","below","made","Click","day","their","use","his","each","received","removed","FOR","For","what","than","THIS","Free","program","NOW","c","but","government","which","work","mails","IN","server","milf","EnenKio","IS","It","link","ad_key","htm]","REPORT","subject","future","Report","days","orders","life","New","Kingdom","had","ext","index","report","theadmanager","first","Get","looking","COM","very","ve","jeweldive","emails","In","1]","total","sites","html]","domain","ilug","U","et","insurance","fromyou2","response","weeks","cgi","addresses","nasty","Guide","en","html","admanmail","United","Islands","How","p","php","JM","NETNOTEINC","And","o","trading","under","des","fund","cfm","family","Q","IT","internet","Marshall","States","l","adclick","Email","Phone","bindex","world","Group","ws","its","Life","Hermios","last","THAT","»","les","big","CD","state","T","State","MY","King","la","sending","Atoll","WILL","As","REMOVE","à","ebonylust4free","Legal","sle","le","interested","Eneen","Kio","lose","B","freak","Membership","Web","_","prizeinthebag","Mr","ad","Wake","Contains","friendfinder","rights","pk007","nsi","e89","banners","Drive","mailer","Computer","hanmail","Have","Warranty","Island","amber","Offer","natural","Marshallese","islands","His","GUIDE","India","Start","1)","aff","_0","gif][http","black","Guaranteed","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"],"wordCount":{"ham":300,"spam":300},"wordFrequencyCount":{"ham":{"0":2547,"1":1460,"2":1201,"3":536,"4":579,"5":592,"7":433,"10":802,"12":196,"18":346,"20":255,"21":152,"22":204,"25":186,"26":165,"27":153,"28":194,"29":126,"30":175,"51":177,"99":231,"2002":1912,"the":20973,"to":13281,"of":10721,"and":10550,"a":10175,"I":7428,"in":6859,"is":6395,"that":6101,"it":5116,"for":4619,"s":3964,"http":3861,"com":3723,"on":3460,"you":3251,"with":3099,"be":2872,"t":2807,"have":2676,"this":2629,"are":2497,"as":2345,"The":2326,"not":2282,"net":2221,"from":2193,"www":2156,"at":2113,"was":1992,"or":1972,"by":1971,"but":1880,"can":1805,"an":1719,"has":1515,"they":1468,"all":1456,"list":1418,"will":1372,"lists":1367,"my":1365,"if":1289,"use":1272,"we":1245,"about":1242,"":1207,"there":1181,"more":1173,"so":1164,"one":1094,"would":1093,"just":1090,"their":1060,"do":1059,"which":1052,"like":1033,"get":1028,"your":1023,"out":1013,"up":990,"some":948,"This":944,"listinfo":931,"what":931,"time":921,"who":893,"It":875,"people":841,"he":839,"|":836,"m":835,"been":831,"me":816,"freshrpms":803,"than":794,"no":793,"mailing":792,"said":788,"Date":770,"them":764,"users":741,"_______________________________________________":730,"new":725,"any":683,"List":665,"had":664,"In":653,"mailman":649,"into":644,"rpm":635,"way":635,"If":625,"html":617,"ve":615,"could":608,"RPM":600,"email":597,"You":596,"because":586,"https":571,"were":565,"his":565,"first":551,"its":536,"our":525,"spam":523,"d":518,"how":512,"To":502,"even":498,"talk":491,"mail":491,"But":490,"using":490,"message":484,"where":470,"most":463,"redhat":462,"ll":453,"line":450,"click":439,"see":435,"linux":422,"years":415,"From":412,"XML":407,"exmh":406,"many":404,"Linux":403,"still":401,"C":399,"problem":396,"+":394,"And":393,"very":390,"ie":379,"We":373,"being":372,"That":369,"apt":366,"find":366,"data":365,"us":361,"after":353,"Exmh":335,"fork":333,"information":322,"own":316,"files":315,"these":313,"world":306,"might":301,"better":291,"without":288,"different":283,"technology":281,"ALB":270,"last":270,"while":267,"DataPower":266,"i":264,"long":264,"news":262,"kernel":261,"code":261,"server":260,"software":253,"Bush":252,"01":252,"Aug":251,"He":245,"United":243,"States":242,"companies":242,"e":234,"ilug":227,"Mr":225,"yahoo":225,"World":222,"high":221,"company":218,"both":216,"unseen":211,"Yahoo":211,"must":211,"version":210,"send":209,"devel":208,"workers":206,"~":206,"doing":205,"Now":203,"unsubscribe":198,"source":197,"against":197,"Red":196,"group":196,"within":193,"business":191,"\\":189,"Hat":186,"root":184,"her":184,"Groups":183,"network":178,"Chris":177,"works":174,"him":174,"web":174,"i386":173,"info":167,"political":164,"]":163,"ever":161,"F":159,"msgs":158,"l":158,"subject":158,"global":156,"Wed":152,"industry":148,"August":148,"seen":146,"Java":142,"says":142,"terms":137,"market":136,"security":135,"k":132,"Capital":130,"man":130,"One":125,"Your":121,"America":121,"processing":119,"application":119,"size":117,"war":117,"hardware":114,"p":114,"Powell":112,"co":112,"docs":110,"Sun":107,"Kelly":106,"President":105,"freedom":102,"million":100,"president":100,"sent":100,"trade":98,"forteana":97,"ximian":96,"ftp":93,"egroups":92,"Sponsor":89,"TM":89,"Networks":89,"7gSolB":87,"Free":87,"uk":87,"CD":85,"French":85,"configure":84,"Dave":84,"Venrock":78,"DVDs":73,"Join":67,"af6_decore":66,"cpp":66,"George":63,"echo":61,"British":60,"Pacific":58,"pub":57,"+s":51,"pt6YBB":51},"spam":{"0":92,"1":437,"2":346,"3":222,"4":180,"5":379,"7":59,"8":89,"10":176,"12":66,"14":54,"18":53,"23":35,"24":65,"25":89,"30":281,"31":270,"50":117,"66":52,"71":69,"72":36,"80":64,"81":106,"100":184,"500":102,"the":4705,"to":4043,"of":3209,"and":3208,"you":2573,"a":2020,"in":1825,"for":1653,"your":1443,"com":1402,"is":1361,"I":1184,"this":1108,"www":1067,"[http":1050,"that":967,"or":840,"with":830,"are":810,"on":791,"be":777,"from":762,"will":737,"have":723,"it":715,"as":686,"http":669,"s":602,"our":593,"not":561,"000":559,"The":550,"by":545,"We":450,"can":446,"This":433,"You":432,"an":424,"email":413,"my":410,"money":394,"If":376,"FREE":375,"mail":375,"e":370,"out":363,"more":349,"net":344,"all":342,"A":340,"do":323,"people":321,"was":317,"we":315,"To":313,"t":311,"gif]":291,"TO":290,"one":285,"information":284,"THE":282,"list":280,"at":279,"has":270,"YOU":269,"get":260,"only":245,"receive":235,"up":232,"":231,"make":229,"if":228,"images":227,"me":222,"please":221,"send":218,"OF":217,"AND":216,"just":211,"E":206,"address":202,"business":199,"name":198,"over":196,"de":194,"any":193,"asp":192,"so":189,"00":187,"been":178,"free":178,"how":176,"YOUR":169,"Your":167,"wish":163,"ie":163,"new":163,"No":161,"S":160,"no":159,"linux":157,"they":155,"who":154,"jpg]":153,"below":153,"made":148,"Click":147,"day":141,"their":140,"use":138,"his":137,"each":134,"received":132,"removed":132,"FOR":131,"For":129,"what":129,"than":129,"THIS":128,"Free":128,"program":128,"NOW":125,"c":125,"but":125,"government":124,"which":122,"work":119,"mails":117,"IN":117,"server":111,"milf":110,"EnenKio":110,"IS":109,"It":109,"link":108,"ad_key":104,"htm]":104,"REPORT":103,"subject":103,"future":102,"Report":100,"days":100,"orders":99,"life":96,"New":94,"Kingdom":93,"had":93,"ext":93,"index":93,"report":91,"theadmanager":89,"first":87,"Get":87,"looking":87,"COM":86,"very":84,"ve":84,"jeweldive":83,"emails":82,"In":82,"1]":81,"total":80,"sites":79,"html]":79,"domain":78,"ilug":78,"U":77,"et":76,"insurance":76,"fromyou2":74,"response":74,"weeks":73,"cgi":73,"addresses":73,"nasty":72,"Guide":72,"en":70,"html":70,"admanmail":70,"United":70,"Islands":68,"How":68,"p":67,"php":66,"JM":66,"NETNOTEINC":66,"And":66,"o":65,"trading":64,"under":63,"des":62,"fund":62,"cfm":61,"family":61,"Q":61,"IT":61,"internet":61,"Marshall":60,"States":60,"l":59,"adclick":58,"Email":58,"Phone":58,"bindex":57,"world":57,"Group":57,"ws":56,"its":55,"Life":54,"Hermios":53,"last":53,"THAT":53,"»":52,"les":52,"big":52,"CD":52,"state":52,"T":50,"State":49,"MY":49,"King":48,"la":48,"sending":48,"Atoll":46,"WILL":46,"As":46,"REMOVE":45,"à":44,"ebonylust4free":43,"Legal":43,"sle":42,"le":42,"interested":42,"Eneen":41,"Kio":41,"lose":41,"B":41,"freak":40,"Membership":40,"Web":40,"_":40,"prizeinthebag":39,"Mr":39,"ad":38,"Wake":37,"Contains":37,"friendfinder":37,"rights":37,"pk007":36,"nsi":36,"e89":36,"banners":36,"Drive":36,"mailer":35,"Computer":35,"hanmail":34,"Have":34,"Warranty":33,"Island":33,"amber":32,"Offer":32,"natural":32,"Marshallese":31,"islands":31,"His":31,"GUIDE":31,"India":30,"Start":29,"1)":29,"aff":29,"_0":29,"gif][http":29,"black":28,"Guaranteed":28,"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA":28}},"options":{"vocabularyLimit":300}} -------------------------------------------------------------------------------- /src/naivebayes.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('naivebayes'); 2 | const stopword = require('stopword'); 3 | 4 | /** 5 | * keys we use to serialize a classifier's state 6 | */ 7 | const STATE_KEYS = [ 8 | 'categories', 9 | 'docCount', 10 | 'totalDocuments', 11 | 'vocabulary', 12 | 'wordCount', 13 | 'wordFrequencyCount', 14 | 'options' 15 | ]; 16 | 17 | /** 18 | * Given an input string, tokenize it into an array of word tokens. 19 | * This is the default tokenization function used if user does not provide one in `options`. 20 | * 21 | * @param {String} text 22 | * @return {Array} 23 | */ 24 | const defaultTokenizer = (text) => { 25 | const rgxPunctuation = /[^(A-Яa-я\u4E00-\u9FA5\w)+\s]/g; 26 | 27 | return text 28 | .replace(rgxPunctuation, ' ') 29 | .replace(/[\u4E00-\u9FA5]/g, (word) => `${word} `) 30 | .split(/\s+/); 31 | }; 32 | 33 | /** 34 | * Naive-Bayes Classifier 35 | * 36 | * This is a naive-bayes classifier that uses Laplace Smoothing. 37 | * 38 | */ 39 | class NaiveBayes { 40 | constructor(options = {}) { 41 | // set options object 42 | this.options = {}; 43 | if (typeof options !== 'undefined') { 44 | if (!options || typeof options !== 'object' || Array.isArray(options)) { 45 | throw new TypeError( 46 | 'NaiveBayes got invalid `options`: `' + 47 | options + 48 | '`. Pass in an object.' 49 | ); 50 | } 51 | 52 | this.options = options; 53 | } 54 | 55 | this.tokenizer = this.options.tokenizer || defaultTokenizer; 56 | 57 | this.vocabulary = []; 58 | 59 | // max vocabulary size based on word frequency, default is no limit 60 | this.vocabularyLimit = this.options.vocabularyLimit || 0; 61 | 62 | // number of documents we have learned from 63 | this.totalDocuments = 0; 64 | 65 | // document frequency table for each of our categories 66 | this.docCount = {}; 67 | 68 | // filter stopwords from vocabulary 69 | this.stopwords = options.stopwords || false; 70 | 71 | // for each category, how many words total were mapped to it 72 | this.wordCount = {}; 73 | 74 | // word frequency table for each category 75 | this.wordFrequencyCount = {}; 76 | 77 | // hashmap of our category names 78 | this.categories = []; 79 | 80 | debug('init %O', this); 81 | } 82 | 83 | /** 84 | * Initialize each of our data structure entries for this new category 85 | * 86 | * @param {String} categoryName 87 | */ 88 | initializeCategory(categoryName) { 89 | if (!this.categories.includes(categoryName)) { 90 | this.docCount[categoryName] = 0; 91 | this.wordCount[categoryName] = 0; 92 | this.wordFrequencyCount[categoryName] = {}; 93 | this.categories.push(categoryName); 94 | } 95 | 96 | return this; 97 | } 98 | 99 | /** 100 | * train our naive-bayes classifier by telling it what `category` 101 | * the `text` corresponds to. 102 | * 103 | * @param {String} text 104 | * @param {String} class 105 | */ 106 | learn(text, category) { 107 | debug({ text, category }); 108 | // initialize category data structures if we've never seen this category 109 | this.initializeCategory(category); 110 | 111 | // update our count of how many documents mapped to this category 112 | this.docCount[category]++; 113 | 114 | // update the total number of documents we have learned from 115 | this.totalDocuments++; 116 | 117 | // normalize the text into a word array 118 | let tokens = this.tokenizer(text); 119 | 120 | if (this.stopwords) { 121 | tokens = stopword.removeStopwords(tokens); 122 | } 123 | 124 | // get a frequency count for each token in the text 125 | const frequencyTable = this.frequencyTable(tokens); 126 | 127 | /* 128 | * Update our vocabulary and our word frequency count for this category 129 | */ 130 | Object.keys(frequencyTable).forEach((token) => { 131 | // add this word to our vocabulary if not already existing 132 | if (!this.vocabulary[token]) { 133 | this.vocabulary.push(token); 134 | } 135 | 136 | const frequencyInText = frequencyTable[token]; 137 | 138 | if (!this.wordFrequencyCount[category]) 139 | this.wordFrequencyCount[category] = {}; 140 | 141 | // update the frequency information for this word in this category 142 | if (this.wordFrequencyCount[category][token]) { 143 | this.wordFrequencyCount[category][token] += frequencyInText; 144 | } else { 145 | this.wordFrequencyCount[category][token] = frequencyInText; 146 | } 147 | 148 | // update the count of all words we have seen mapped to this category 149 | this.wordCount[category] += frequencyInText; 150 | }); 151 | 152 | if (!this.vocabularyLimit || this.vocabulary.length <= this.vocabularyLimit) 153 | return this; 154 | 155 | const newFrequencyCount = {}; 156 | for (const category in this.wordFrequencyCount) { 157 | if (Object.hasOwnProperty.call(this.wordFrequencyCount, category)) { 158 | const frequencyTable = this.wordFrequencyCount[category]; 159 | const words = Object.keys(frequencyTable); 160 | if (words.length <= this.vocabularyLimit) { 161 | newFrequencyCount[category] = this.wordFrequencyCount[category]; 162 | continue; 163 | } 164 | 165 | // sort words by highest frequency 166 | const frequentWords = words.sort( 167 | (a, b) => frequencyTable[b] - frequencyTable[a] 168 | ); 169 | 170 | debug({ frequentWords }); 171 | 172 | // build up new structure until vocab limit reached 173 | for (let count = 0; count < this.vocabularyLimit; count++) { 174 | const word = frequentWords[count]; 175 | if (!newFrequencyCount[category]) { 176 | newFrequencyCount[category] = {}; 177 | } 178 | 179 | newFrequencyCount[category][word] = this.wordFrequencyCount[category][ 180 | word 181 | ]; 182 | } 183 | } 184 | } 185 | 186 | this.wordFrequencyCount = newFrequencyCount; 187 | 188 | this.vocabulary = []; 189 | this.wordCount = {}; 190 | const categories = Object.keys(this.wordFrequencyCount); 191 | for (const category of categories) { 192 | const words = Object.keys(this.wordFrequencyCount[category]); 193 | this.wordCount[category] = words.length; 194 | this.vocabulary = [...this.vocabulary, ...words]; 195 | } 196 | 197 | return this; 198 | } 199 | 200 | /** 201 | * Determine what category `text` belongs to. 202 | * 203 | * @param {String} text 204 | * @param {Boolean} probability 205 | * @return {String} category 206 | */ 207 | categorize(text, probability) { 208 | const category = probability 209 | ? this.probabilities(text)[0] 210 | : this.probabilities(text)[0].category; 211 | debug('categorize: %O', { text, category, probability }); 212 | return category; 213 | } 214 | 215 | /** 216 | * Determine category probabilities for `text`. 217 | * 218 | * @param {String} text 219 | * @return {Array} probabilities 220 | */ 221 | probabilities(text) { 222 | // [W1,W2,W3,W4,Wn...] 223 | const tokens = this.tokenizer(text); 224 | const frequencyTable = this.frequencyTable(tokens); 225 | 226 | // P(W1|C) * P(W2|C) ... P(Wn|C) * P(C) 227 | // iterate thru our categories to calculate the probability for this text 228 | return this.categories 229 | .map((category) => { 230 | // start by calculating the overall probability of this category 231 | // => out of all documents we've ever looked at, how many were 232 | // mapped to this category 233 | const categoryProbability = 234 | this.docCount[category] / this.vocabularyLimit 235 | ? this.wordCount[category] 236 | : this.totalDocuments; 237 | 238 | // take the log to avoid underflow 239 | let logProbability = Math.log(categoryProbability); 240 | 241 | // now determine P( w | c ) for each word `w` in the text 242 | Object.keys(frequencyTable).forEach((token) => { 243 | const frequencyInText = frequencyTable[token]; 244 | const tokenProbability = this.tokenProbability(token, category); 245 | 246 | // determine the log of the P( w | c ) for this word 247 | logProbability += frequencyInText * Math.log(tokenProbability); 248 | }); 249 | 250 | debug('probabilities: %O', { category, logProbability }); 251 | 252 | return { 253 | category, 254 | probability: logProbability 255 | }; 256 | }) 257 | .sort((previous, next) => next.probability - previous.probability); 258 | } 259 | 260 | /** 261 | * Calculate probability that a `token` belongs to a `category` 262 | * 263 | * @param {String} token 264 | * @param {String} category 265 | * @return {Number} probability 266 | */ 267 | tokenProbability(token, category) { 268 | if (!this.wordFrequencyCount[category]) 269 | this.wordFrequencyCount[category] = {}; 270 | 271 | const wordFrequencyCount = this.wordFrequencyCount[category][token] || 0; 272 | 273 | const wordCount = this.wordCount[category]; 274 | 275 | // P(W|C) 276 | return (wordFrequencyCount + 1) / (wordCount + this.vocabulary.length); 277 | } 278 | 279 | /** 280 | * Build a frequency hashmap where 281 | * - the keys are the entries in `tokens` 282 | * - the values are the frequency of each entry in `tokens` 283 | * 284 | * @param {Array} tokens Normalized word array 285 | * @return {Object} 286 | */ 287 | frequencyTable(tokens) { 288 | const frequencyTable = Object.create(null); 289 | for (const token of tokens) { 290 | if (frequencyTable[token]) { 291 | frequencyTable[token]++; 292 | } else { 293 | frequencyTable[token] = 1; 294 | } 295 | } 296 | 297 | return frequencyTable; 298 | } 299 | 300 | /** 301 | * Dump the classifier's state as a JSON string. 302 | * @param {Boolean} Optionally format the serialized JSON output for easier human consumption 303 | * @return {String} Representation of the classifier. 304 | */ 305 | toJson(prettyPrint) { 306 | const prettyPrintSpaces = prettyPrint ? 2 : 0; 307 | return JSON.stringify(this.toJsonObject(), null, prettyPrintSpaces); 308 | } 309 | 310 | toJsonObject() { 311 | const state = {}; 312 | for (const key of STATE_KEYS) { 313 | state[key] = this[key]; 314 | } 315 | 316 | return state; 317 | } 318 | 319 | /** 320 | * Initializes a NaiveBayes instance from a JSON state representation. 321 | * Use this with classifier.toJson(). 322 | * 323 | * @param {String} jsonStr state representation obtained by classifier.toJson() 324 | * @return {NaiveBayes} Classifier 325 | */ 326 | static fromJson(json, limit) { 327 | if (typeof json === 'string') { 328 | try { 329 | json = JSON.parse(json); 330 | } catch { 331 | throw new Error('Naivebayes.fromJson expects a valid JSON string.'); 332 | } 333 | } 334 | 335 | if (json.options && limit) { 336 | json.options.vocabularyLimit = limit || 0; 337 | } 338 | 339 | // init a new classifier 340 | const classifier = new NaiveBayes(json.options); 341 | 342 | // override the classifier's state 343 | STATE_KEYS.forEach((key) => { 344 | if (json[key] === undefined) { 345 | throw new Error( 346 | `NaiveBayes.fromJson: JSON string is missing an expected property: '${key}'.` 347 | ); 348 | } else { 349 | classifier[key] = json[key]; 350 | } 351 | }); 352 | 353 | return classifier; 354 | } 355 | 356 | static getStateKeys() { 357 | return STATE_KEYS; 358 | } 359 | } 360 | 361 | module.exports = NaiveBayes; 362 | --------------------------------------------------------------------------------