├── .gitignore ├── LICENSE ├── README.md ├── lib └── classificator.js ├── package-lock.json ├── package.json ├── test └── classificator.js └── yarn.lock /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/node,linux,webstorm 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | ### Node ### 20 | # Logs 21 | logs 22 | *.log 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # Runtime data 28 | pids 29 | *.pid 30 | *.seed 31 | *.pid.lock 32 | 33 | # Directory for instrumented libs generated by jscoverage/JSCover 34 | lib-cov 35 | 36 | # Coverage directory used by tools like istanbul 37 | coverage 38 | 39 | # nyc test coverage 40 | .nyc_output 41 | 42 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | bower_components 47 | 48 | # node-waf configuration 49 | .lock-wscript 50 | 51 | # Compiled binary addons (http://nodejs.org/api/addons.html) 52 | build/Release 53 | 54 | # Dependency directories 55 | node_modules/ 56 | jspm_packages/ 57 | 58 | # Typescript v1 declaration files 59 | typings/ 60 | 61 | # Optional npm cache directory 62 | .npm 63 | 64 | # Optional eslint cache 65 | .eslintcache 66 | 67 | # Optional REPL history 68 | .node_repl_history 69 | 70 | # Output of 'npm pack' 71 | *.tgz 72 | 73 | # Yarn Integrity file 74 | .yarn-integrity 75 | 76 | # dotenv environment variables file 77 | .env 78 | 79 | 80 | ### WebStorm ### 81 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 82 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 83 | 84 | # User-specific stuff: 85 | .idea/**/workspace.xml 86 | .idea/**/tasks.xml 87 | .idea/dictionaries 88 | 89 | # Sensitive or high-churn files: 90 | .idea/**/dataSources/ 91 | .idea/**/dataSources.ids 92 | .idea/**/dataSources.xml 93 | .idea/**/dataSources.local.xml 94 | .idea/**/sqlDataSources.xml 95 | .idea/**/dynamic.xml 96 | .idea/**/uiDesigner.xml 97 | 98 | # Gradle: 99 | .idea/**/gradle.xml 100 | .idea/**/libraries 101 | 102 | # CMake 103 | cmake-build-debug/ 104 | 105 | # Mongo Explorer plugin: 106 | .idea/**/mongoSettings.xml 107 | 108 | ## File-based project format: 109 | *.iws 110 | 111 | ## Plugin-specific files: 112 | 113 | # IntelliJ 114 | /out/ 115 | 116 | # mpeltonen/sbt-idea plugin 117 | .idea_modules/ 118 | 119 | # JIRA plugin 120 | atlassian-ide-plugin.xml 121 | 122 | # Cursive Clojure plugin 123 | .idea/replstate.xml 124 | 125 | # Crashlytics plugin (for Android Studio and IntelliJ) 126 | com_crashlytics_export_strings.xml 127 | crashlytics.properties 128 | crashlytics-build.properties 129 | fabric.properties 130 | 131 | ### WebStorm Patch ### 132 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 133 | 134 | # *.iml 135 | # modules.xml 136 | # .idea/misc.xml 137 | # *.ipr 138 | 139 | # Sonarlint plugin 140 | .idea/sonarlint 141 | 142 | # End of https://www.gitignore.io/api/node,linux,webstorm 143 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Wozacosta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # classificator 2 | 3 | [![NPM Licence shield](https://img.shields.io/github/license/Wozacosta/classificator.svg)](https://github.com/Wozacosta/classificator/blob/master/LICENSE) 4 | [![NPM release version shield](https://img.shields.io/npm/v/classificator.svg)](https://www.npmjs.com/package/classificator) 5 | 6 | Naive Bayes classifier for node.js 7 | 8 | `bayes` takes a document (piece of text), and tells you what category that document belongs to. 9 | 10 | 11 | ## What can I use this for? 12 | 13 | You can use this for categorizing any text content into any arbitrary set of **categories**. For example: 14 | 15 | - is an email **spam**, or **not spam** ? 16 | - is a news article about **technology**, **politics**, or **sports** ? 17 | - is a piece of text expressing **positive** emotions, or **negative** emotions? 18 | 19 | More here: https://en.wikipedia.org/wiki/Naive_Bayes_classifier 20 | 21 | 22 | ## Installing 23 | 24 | Recommended: Node v6.0.0 + 25 | 26 | ``` 27 | npm install --save classificator 28 | ``` 29 | 30 | 31 | ## Usage 32 | 33 | ``` 34 | const bayes = require('classificator') 35 | const classifier = bayes() 36 | ``` 37 | 38 | ### Teach your classifier 39 | 40 | ``` 41 | classifier.learn('amazing, awesome movie! Had a good time', 'positive') 42 | classifier.learn('Buy my free viagra pill and get rich!', 'spam') 43 | classifier.learn('I really hate dust and annoying cats', 'negative') 44 | classifier.learn('LOL this sucks so hard', 'troll') 45 | ``` 46 | 47 | ### Make your classifier unlearn 48 | 49 | ``` 50 | classifier.learn('i hate mornings', 'positive'); 51 | // uh oh, that was mistake. Time to unlearn 52 | classifier.unlearn('i hate mornings', 'positive'); 53 | ``` 54 | 55 | ### Remove a category 56 | 57 | ``` 58 | classifier.removeCategory('troll'); 59 | ``` 60 | 61 | ### categorization 62 | 63 | ``` 64 | classifier.categorize("I've always hated Martians"); 65 | // => { 66 | likelihoods: [ 67 | { 68 | category: 'negative', 69 | logLikelihood: -17.241944258040537, 70 | logProba: -0.6196197927020783, 71 | proba: 0.538149006882628 72 | }, { 73 | category: 'positive', 74 | logLikelihood: -17.93509143860048, 75 | logProba: -1.312766973262022, 76 | proba: 0.26907450344131445 77 | }, { 78 | category: 'spam', 79 | logLikelihood: -18.26854831109384, 80 | logProba: -1.646223845755383, 81 | proba: 0.19277648967605832 } 82 | ], 83 | predictedCategory: 'negative' 84 | } 85 | ``` 86 | 87 | ### serialize the classifier's state as a JSON string. 88 | 89 | `let stateJson = classifier.toJson()` 90 | 91 | ### load the classifier back from its JSON representation. 92 | 93 | `let revivedClassifier = bayes.fromJson(stateJson)` 94 | 95 | note: `stateJson` can either be a JSON string (obtained from `classifier.toJson()`), or an object 96 | 97 | 98 | -------- 99 | 100 | 101 | ## API 102 | 103 | ### `let classifier = bayes([options])` 104 | 105 | Returns an instance of a Naive-Bayes Classifier. 106 | 107 | Pass in an optional `options` object to configure the instance. 108 | 109 | If you specify a `tokenizer` function in `options`, it will be used as the instance's tokenizer. It receives a (string) `text` argument - this is the string value that is passed in by you when you call `.learn()` or `.categorize()`. It must return an array of tokens. The default tokenizer removes punctuation and splits on spaces. 110 | 111 | Eg. 112 | 113 | ``` 114 | let classifier = bayes({ 115 | tokenizer: function (text) { return text.split(' ') } 116 | }) 117 | ``` 118 | 119 | You can specify the `alpha` parameter of the [additive smoothing operation](https://en.wikipedia.org/wiki/Additive_smoothing). 120 | This is an integer. 121 | The default value is 1 122 | 123 | You can also specify the `fitPrior` parameter. 124 | Defines how the [prior probablity](https://en.wikipedia.org/wiki/Prior_probability) is calculated. 125 | If set to `false`, the classifier will use an uniform prior rather than a learnt one. 126 | The default value is `true`. 127 | 128 | ### `classifier.learn(text, category)` 129 | 130 | Teach your classifier what `category` should be associated with an array `text` of words. 131 | 132 | ### `classifier.unlearn(text, category)` 133 | 134 | The classifier will unlearn the `text` that was associated with `category`. 135 | 136 | ### `classifier.removeCategory(category)` 137 | 138 | The category is removed and the classifier data are updated accordingly. 139 | 140 | ### `classifier.categorize(text)` 141 | 142 | *Parameters* 143 | 144 | `text {String}` 145 | 146 | *Returns* 147 | 148 | `{Object}` An object with the `predictedCategory` and an array of the categories 149 | ordered by likelihood (most likely first). 150 | 151 | ``` 152 | { 153 | likelihoods : [ 154 | ... 155 | { 156 | category: 'positive', 157 | logLikelihood: -17.93509143860048, 158 | logProba: -1.312766973262022, 159 | proba: 0.26907450344131445 160 | }, 161 | ... 162 | ], 163 | predictedCategory : 'negative' //--> the main category bayes thinks text 164 | belongs to. As a string 165 | } 166 | ``` 167 | 168 | ### `classifier.toJson()` 169 | 170 | Returns the JSON representation of a classifier. 171 | 172 | ### `let classifier = bayes.fromJson(jsonStr)` 173 | 174 | Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()` 175 | -------------------------------------------------------------------------------- /lib/classificator.js: -------------------------------------------------------------------------------- 1 | const Decimal = require('decimal.js').default; // handles arbitrary-precision arithmetics. 2 | 3 | /* 4 | Expose our naive-bayes generator function 5 | */ 6 | 7 | module.exports = function(options) { 8 | return new Naivebayes(options); 9 | }; 10 | 11 | // keys we use to serialize a classifier's state 12 | const STATE_KEYS = (module.exports.STATE_KEYS = [ 13 | 'categories', 14 | 'docCount', 15 | 'totalDocuments', 16 | 'vocabulary', 17 | 'vocabularySize', 18 | 'wordCount', 19 | 'wordFrequencyCount', 20 | 'options', 21 | ]); 22 | const DEFAULT_ALPHA = 1; 23 | const DEFAULT_FIT_PRIOR = true; 24 | 25 | /** 26 | * Initializes a NaiveBayes instance from a JSON state representation. 27 | * Use this with classifier.toJson(). 28 | * 29 | * @param {String|Object} jsonStrOrObject state representation obtained by classifier.toJson() 30 | * @return {NaiveBayes} Classifier 31 | */ 32 | module.exports.fromJson = (jsonStrOrObject) => { 33 | let parameters; 34 | 35 | try { 36 | switch (typeof jsonStrOrObject) { 37 | case 'string': 38 | parameters = JSON.parse(jsonStrOrObject); 39 | break; 40 | 41 | case 'object': 42 | parameters = jsonStrOrObject; 43 | break; 44 | 45 | default: 46 | throw new Error(''); 47 | } 48 | } catch (e) { 49 | console.error(e); 50 | throw new Error('Naivebays.fromJson expects a valid JSON string or an object.'); 51 | } 52 | 53 | // init a new classifier 54 | const classifier = new Naivebayes(parameters.options); 55 | 56 | // override the classifier's state 57 | STATE_KEYS.forEach((k) => { 58 | if (typeof parameters[k] === 'undefined') { 59 | throw new Error( 60 | `Naivebayes.fromJson: JSON string is missing an expected property: [${k}].` 61 | ); 62 | } 63 | classifier[k] = parameters[k]; 64 | }); 65 | 66 | return classifier; 67 | }; 68 | 69 | /** 70 | * Given an input string, tokenize it into an array of word tokens. 71 | * This is the default tokenization function used if user does not provide one in `options`. 72 | * 73 | * @param {String} text 74 | * @return {Array} 75 | */ 76 | const defaultTokenizer = (text) => { 77 | // remove punctuation from text - remove anything that isn't a word char or a space 78 | const rgxPunctuation = /[^(a-zA-ZA-Яa-я0-9_)+\s]/g; 79 | 80 | const sanitized = text.replace(rgxPunctuation, ' '); 81 | // tokens = tokens.filter(function(token) { 82 | // return token.length >= _that.config.minimumLength; 83 | // }); 84 | 85 | return sanitized.split(/\s+/); 86 | }; 87 | 88 | /** 89 | * Naive-Bayes Classifier 90 | * 91 | * This is a naive-bayes classifier that uses Laplace Smoothing. 92 | * 93 | * Takes an (optional) options object containing: 94 | * - `tokenizer` => custom tokenization function 95 | * 96 | */ 97 | function Naivebayes(options) { 98 | // set options object 99 | this.options = {}; 100 | if (typeof options !== 'undefined') { 101 | if (!options || typeof options !== 'object' || Array.isArray(options)) { 102 | throw TypeError( 103 | `NaiveBayes got invalid 'options': ${options}'. Pass in an object.` 104 | ); 105 | } 106 | this.options = options; 107 | } 108 | 109 | this.tokenizer = this.options.tokenizer || defaultTokenizer; 110 | this.alpha = this.options.alpha || DEFAULT_ALPHA; 111 | this.fitPrior = this.options.fitPrior === undefined ? DEFAULT_FIT_PRIOR : this.options.fitPrior; 112 | // initialize our vocabulary and its size 113 | this.vocabulary = {}; 114 | this.vocabularySize = 0; 115 | 116 | // number of documents we have learned from 117 | this.totalDocuments = 0; 118 | 119 | // document frequency table for each of our categories 120 | //= > for each category, how often were documents mapped to it 121 | this.docCount = {}; 122 | 123 | // for each category, how many words total were mapped to it 124 | this.wordCount = {}; 125 | 126 | // word frequency table for each category 127 | //= > for each category, how frequent was a given word mapped to it 128 | this.wordFrequencyCount = {}; 129 | 130 | // hashmap of our category names 131 | this.categories = {}; 132 | } 133 | 134 | 135 | /** 136 | * Initialize each of our data structure entries for this new category 137 | * 138 | * @param {String} categoryName 139 | */ 140 | Naivebayes.prototype.initializeCategory = function(categoryName) { 141 | if (!this.categories[categoryName]) { 142 | this.docCount[categoryName] = 0; 143 | this.wordCount[categoryName] = 0; 144 | this.wordFrequencyCount[categoryName] = {}; 145 | this.categories[categoryName] = true; 146 | } 147 | return this; 148 | }; 149 | 150 | /** 151 | * Properly remove a category, unlearning all words that were associated to it. 152 | * 153 | * @param {String} categoryName 154 | */ 155 | Naivebayes.prototype.removeCategory = function(categoryName) { 156 | if (!this.categories[categoryName]) { 157 | return this; 158 | } 159 | // update the total number of documents we have learned from 160 | this.totalDocuments -= this.docCount[categoryName]; 161 | 162 | Object.keys(this.wordFrequencyCount[categoryName]).forEach((token) => { 163 | this.vocabulary[token]--; 164 | if (this.vocabulary[token] === 0) this.vocabularySize--; 165 | }); 166 | 167 | delete this.docCount[categoryName]; 168 | delete this.wordCount[categoryName]; 169 | delete this.wordFrequencyCount[categoryName]; 170 | delete this.categories[categoryName]; 171 | 172 | return this; 173 | }; 174 | 175 | /** 176 | * train our naive-bayes classifier by telling it what `category` 177 | * the `text` corresponds to. 178 | * 179 | * @param {String} text 180 | * @param {String} category Category to learn as being text 181 | */ 182 | Naivebayes.prototype.learn = function(text, category) { 183 | // initialize category data structures if we've never seen this category 184 | this.initializeCategory(category); 185 | 186 | // update our count of how many documents mapped to this category 187 | this.docCount[category]++; 188 | 189 | // update the total number of documents we have learned from 190 | this.totalDocuments++; 191 | 192 | // normalize the text into a word array 193 | const tokens = this.tokenizer(text); 194 | 195 | // get a frequency count for each token in the text 196 | const frequencyTable = this.frequencyTable(tokens); 197 | 198 | Object.keys(frequencyTable).forEach((token) => { 199 | const frequencyInText = frequencyTable[token]; 200 | 201 | // add this word to our vocabulary if not already existing 202 | if (!this.vocabulary[token] || this.vocabulary[token] === 0) { 203 | this.vocabularySize++; 204 | this.vocabulary[token] = 1; 205 | // this.vocabulary[token] = frequencyInText; 206 | } else if (this.vocabulary[token] > 0) { 207 | this.vocabulary[token]++; 208 | // this.vocabulary[token] += frequencyInText; 209 | } 210 | 211 | 212 | // update the frequency information for this word in this category 213 | if (!this.wordFrequencyCount[category][token]) { 214 | this.wordFrequencyCount[category][token] = frequencyInText; 215 | } else this.wordFrequencyCount[category][token] += frequencyInText; 216 | 217 | // update the count of all words we have seen mapped to this category 218 | this.wordCount[category] += frequencyInText; 219 | }); 220 | 221 | return this; 222 | }; 223 | 224 | /** 225 | * untrain our naive-bayes classifier by telling it what `category` 226 | * the `text` to remove corresponds to. 227 | * 228 | * @param {String} text 229 | * @param {String} category Category to unlearn as being text 230 | */ 231 | Naivebayes.prototype.unlearn = function(text, category) { 232 | // update our count of how many documents mapped to this category 233 | this.docCount[category]--; 234 | if (this.docCount[category] === 0) { 235 | delete this.docCount[category]; 236 | } 237 | 238 | // update the total number of documents we have learned from 239 | this.totalDocuments--; 240 | 241 | // normalize the text into a word array 242 | const tokens = this.tokenizer(text); 243 | 244 | // get a frequency count for each token in the text 245 | const frequencyTable = this.frequencyTable(tokens); 246 | 247 | /* 248 | Update our vocabulary and our word frequency count for this category 249 | */ 250 | 251 | Object.keys(frequencyTable).forEach((token) => { 252 | const frequencyInText = frequencyTable[token]; 253 | 254 | // add this word to our vocabulary if not already existing 255 | if (this.vocabulary[token] && this.vocabulary[token] > 0) { 256 | this.vocabulary[token] -= frequencyInText; 257 | if (this.vocabulary[token] === 0) this.vocabularySize--; 258 | } 259 | 260 | 261 | this.wordFrequencyCount[category][token] -= frequencyInText; 262 | if (this.wordFrequencyCount[category][token] === 0) { 263 | delete this.wordFrequencyCount[category][token]; 264 | } 265 | 266 | // update the count of all words we have seen mapped to this category 267 | this.wordCount[category] -= frequencyInText; 268 | if (this.wordCount[category] === 0) { 269 | delete this.wordCount[category]; 270 | delete this.wordFrequencyCount[category]; 271 | } 272 | }); 273 | 274 | return this; 275 | }; 276 | 277 | 278 | /** 279 | * Determine what category `text` belongs to. 280 | * 281 | * @param {String} text 282 | * 283 | * @return {Object} The predicted category, and the likelihoods stats. 284 | */ 285 | Naivebayes.prototype.categorize = function(text) { 286 | const tokens = this.tokenizer(text); 287 | const frequencyTable = this.frequencyTable(tokens); 288 | const categories = Object.keys(this.categories); 289 | const likelihoods = []; 290 | 291 | // iterate through our categories to find the one with max probability for this text 292 | categories.forEach((category) => { 293 | // start by calculating the overall probability of this category 294 | //= > out of all documents we've ever looked at, how many were 295 | // mapped to this category 296 | let categoryLikelihood; 297 | if (this.fitPrior) { 298 | categoryLikelihood = this.docCount[category] / this.totalDocuments; 299 | } else { 300 | categoryLikelihood = 1; 301 | } 302 | 303 | // take the log to avoid underflow 304 | // let logLikelihood = Math.log(categoryLikelihood); 305 | let logLikelihood = Decimal(categoryLikelihood); 306 | logLikelihood = logLikelihood.naturalLogarithm(); 307 | 308 | // now determine P( w | c ) for each word `w` in the text 309 | Object.keys(frequencyTable).forEach((token) => { 310 | if (this.vocabulary[token] && this.vocabulary[token] > 0) { 311 | const termFrequencyInText = frequencyTable[token]; 312 | const tokenProbability = this.tokenProbability(token, category); 313 | 314 | // determine the log of the P( w | c ) for this word 315 | // logLikelihood += termFrequencyInText * Math.log(tokenProbability); 316 | let logTokenProbability = Decimal(tokenProbability); 317 | logTokenProbability = logTokenProbability.naturalLogarithm(); 318 | logLikelihood = logLikelihood.plus(termFrequencyInText * logTokenProbability); 319 | } 320 | }); 321 | 322 | if (logLikelihood == Number.NEGATIVE_INFINITY) { 323 | console.warn(`[Classificator] category ${category} had -Infinity odds`); 324 | } 325 | likelihoods.push({ category, logLikelihood }); 326 | }); 327 | 328 | const logsumexp = (likelihoods) => { 329 | let sum = new Decimal(0); 330 | likelihoods.forEach((likelihood) => { 331 | const x = Decimal(likelihood.logLikelihood); 332 | const a = Decimal.exp(x); 333 | sum = sum.plus(a); 334 | }); 335 | 336 | return sum.naturalLogarithm(); 337 | }; 338 | 339 | const logProbX = logsumexp(likelihoods); 340 | likelihoods.forEach((likelihood) => { 341 | likelihood.logProba = Decimal(likelihood.logLikelihood).minus(logProbX); 342 | likelihood.proba = likelihood.logProba.naturalExponential(); 343 | likelihood.logProba = likelihood.logProba.toNumber(); 344 | likelihood.proba = likelihood.proba.toNumber(); 345 | likelihood.logLikelihood = likelihood.logLikelihood.toNumber(); 346 | }); 347 | 348 | // sort to have first element with biggest probability 349 | likelihoods.sort((a, b) => b.proba - a.proba); 350 | 351 | return { 352 | likelihoods, 353 | predictedCategory: likelihoods[0].category 354 | }; 355 | }; 356 | 357 | /** 358 | * Calculate probability that a `token` belongs to a `category` 359 | * 360 | * @param {String} token 361 | * @param {String} category 362 | * @return {Number} probability 363 | */ 364 | Naivebayes.prototype.tokenProbability = function(token, category) { 365 | // how many times this word has occurred in documents mapped to this category 366 | const wordFrequencyCount = this.wordFrequencyCount[category][token] || 0; 367 | 368 | // what is the count of all words that have ever been mapped to this category 369 | const wordCount = this.wordCount[category]; 370 | 371 | // use laplace Add-1 Smoothing equation 372 | return (wordFrequencyCount + this.alpha) / (wordCount + this.alpha * this.vocabularySize); 373 | }; 374 | 375 | /** 376 | * Build a frequency hashmap where 377 | * - the keys are the entries in `tokens` 378 | * - the values are the frequency of each entry in `tokens` 379 | * 380 | * @param {Array} tokens Normalized word array 381 | * @return {Object} 382 | */ 383 | Naivebayes.prototype.frequencyTable = function(tokens) { 384 | const frequencyTable = Object.create(null); 385 | 386 | tokens.forEach((token) => { 387 | if (!frequencyTable[token]) frequencyTable[token] = 1; 388 | else frequencyTable[token]++; 389 | }); 390 | 391 | return frequencyTable; 392 | }; 393 | 394 | /** 395 | * Dump the classifier's state as a JSON string. 396 | * @return {String} Representation of the classifier. 397 | */ 398 | Naivebayes.prototype.toJson = function() { 399 | const state = {}; 400 | 401 | STATE_KEYS.forEach(k => (state[k] = this[k])); 402 | 403 | return JSON.stringify(state); 404 | }; 405 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "classificator", 3 | "version": "0.3.4", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "@ungap/promise-all-settled": { 8 | "version": "1.1.2", 9 | "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", 10 | "integrity": "sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==", 11 | "dev": true 12 | }, 13 | "ansi-colors": { 14 | "version": "4.1.1", 15 | "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", 16 | "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", 17 | "dev": true 18 | }, 19 | "ansi-regex": { 20 | "version": "3.0.0", 21 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", 22 | "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", 23 | "dev": true 24 | }, 25 | "ansi-styles": { 26 | "version": "4.3.0", 27 | "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", 28 | "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", 29 | "dev": true, 30 | "requires": { 31 | "color-convert": "^2.0.1" 32 | } 33 | }, 34 | "anymatch": { 35 | "version": "3.1.2", 36 | "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", 37 | "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", 38 | "dev": true, 39 | "requires": { 40 | "normalize-path": "^3.0.0", 41 | "picomatch": "^2.0.4" 42 | } 43 | }, 44 | "argparse": { 45 | "version": "2.0.1", 46 | "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", 47 | "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", 48 | "dev": true 49 | }, 50 | "balanced-match": { 51 | "version": "1.0.2", 52 | "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", 53 | "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", 54 | "dev": true 55 | }, 56 | "binary-extensions": { 57 | "version": "2.2.0", 58 | "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", 59 | "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", 60 | "dev": true 61 | }, 62 | "brace-expansion": { 63 | "version": "1.1.11", 64 | "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", 65 | "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", 66 | "dev": true, 67 | "requires": { 68 | "balanced-match": "^1.0.0", 69 | "concat-map": "0.0.1" 70 | } 71 | }, 72 | "braces": { 73 | "version": "3.0.2", 74 | "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", 75 | "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", 76 | "dev": true, 77 | "requires": { 78 | "fill-range": "^7.0.1" 79 | } 80 | }, 81 | "browser-stdout": { 82 | "version": "1.3.1", 83 | "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz", 84 | "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", 85 | "dev": true 86 | }, 87 | "camelcase": { 88 | "version": "6.2.0", 89 | "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.2.0.tgz", 90 | "integrity": "sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==", 91 | "dev": true 92 | }, 93 | "chalk": { 94 | "version": "4.1.1", 95 | "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", 96 | "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", 97 | "dev": true, 98 | "requires": { 99 | "ansi-styles": "^4.1.0", 100 | "supports-color": "^7.1.0" 101 | }, 102 | "dependencies": { 103 | "supports-color": { 104 | "version": "7.2.0", 105 | "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", 106 | "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", 107 | "dev": true, 108 | "requires": { 109 | "has-flag": "^4.0.0" 110 | } 111 | } 112 | } 113 | }, 114 | "chokidar": { 115 | "version": "3.5.2", 116 | "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.2.tgz", 117 | "integrity": "sha512-ekGhOnNVPgT77r4K/U3GDhu+FQ2S8TnK/s2KbIGXi0SZWuwkZ2QNyfWdZW+TVfn84DpEP7rLeCt2UI6bJ8GwbQ==", 118 | "dev": true, 119 | "requires": { 120 | "anymatch": "~3.1.2", 121 | "braces": "~3.0.2", 122 | "fsevents": "~2.3.2", 123 | "glob-parent": "~5.1.2", 124 | "is-binary-path": "~2.1.0", 125 | "is-glob": "~4.0.1", 126 | "normalize-path": "~3.0.0", 127 | "readdirp": "~3.6.0" 128 | } 129 | }, 130 | "cliui": { 131 | "version": "7.0.4", 132 | "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", 133 | "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", 134 | "dev": true, 135 | "requires": { 136 | "string-width": "^4.2.0", 137 | "strip-ansi": "^6.0.0", 138 | "wrap-ansi": "^7.0.0" 139 | }, 140 | "dependencies": { 141 | "ansi-regex": { 142 | "version": "5.0.0", 143 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", 144 | "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", 145 | "dev": true 146 | }, 147 | "is-fullwidth-code-point": { 148 | "version": "3.0.0", 149 | "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", 150 | "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", 151 | "dev": true 152 | }, 153 | "string-width": { 154 | "version": "4.2.2", 155 | "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", 156 | "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", 157 | "dev": true, 158 | "requires": { 159 | "emoji-regex": "^8.0.0", 160 | "is-fullwidth-code-point": "^3.0.0", 161 | "strip-ansi": "^6.0.0" 162 | } 163 | }, 164 | "strip-ansi": { 165 | "version": "6.0.0", 166 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", 167 | "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", 168 | "dev": true, 169 | "requires": { 170 | "ansi-regex": "^5.0.0" 171 | } 172 | } 173 | } 174 | }, 175 | "color-convert": { 176 | "version": "2.0.1", 177 | "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", 178 | "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", 179 | "dev": true, 180 | "requires": { 181 | "color-name": "~1.1.4" 182 | } 183 | }, 184 | "color-name": { 185 | "version": "1.1.4", 186 | "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", 187 | "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", 188 | "dev": true 189 | }, 190 | "concat-map": { 191 | "version": "0.0.1", 192 | "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", 193 | "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", 194 | "dev": true 195 | }, 196 | "debug": { 197 | "version": "4.3.1", 198 | "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", 199 | "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", 200 | "dev": true, 201 | "requires": { 202 | "ms": "2.1.2" 203 | }, 204 | "dependencies": { 205 | "ms": { 206 | "version": "2.1.2", 207 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", 208 | "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", 209 | "dev": true 210 | } 211 | } 212 | }, 213 | "decamelize": { 214 | "version": "4.0.0", 215 | "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz", 216 | "integrity": "sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==", 217 | "dev": true 218 | }, 219 | "decimal.js": { 220 | "version": "10.0.1", 221 | "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.0.1.tgz", 222 | "integrity": "sha512-vklWB5C4Cj423xnaOtsUmAv0/7GqlXIgDv2ZKDyR64OV3OSzGHNx2mk4p/1EKnB5s70k73cIOOEcG9YzF0q4Lw==" 223 | }, 224 | "diff": { 225 | "version": "5.0.0", 226 | "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", 227 | "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==", 228 | "dev": true 229 | }, 230 | "emoji-regex": { 231 | "version": "8.0.0", 232 | "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", 233 | "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", 234 | "dev": true 235 | }, 236 | "escalade": { 237 | "version": "3.1.1", 238 | "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", 239 | "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", 240 | "dev": true 241 | }, 242 | "escape-string-regexp": { 243 | "version": "4.0.0", 244 | "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", 245 | "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", 246 | "dev": true 247 | }, 248 | "fill-range": { 249 | "version": "7.0.1", 250 | "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", 251 | "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", 252 | "dev": true, 253 | "requires": { 254 | "to-regex-range": "^5.0.1" 255 | } 256 | }, 257 | "find-up": { 258 | "version": "5.0.0", 259 | "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", 260 | "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", 261 | "dev": true, 262 | "requires": { 263 | "locate-path": "^6.0.0", 264 | "path-exists": "^4.0.0" 265 | } 266 | }, 267 | "flat": { 268 | "version": "5.0.2", 269 | "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", 270 | "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", 271 | "dev": true 272 | }, 273 | "fs.realpath": { 274 | "version": "1.0.0", 275 | "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", 276 | "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", 277 | "dev": true 278 | }, 279 | "fsevents": { 280 | "version": "2.3.2", 281 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 282 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 283 | "dev": true, 284 | "optional": true 285 | }, 286 | "get-caller-file": { 287 | "version": "2.0.5", 288 | "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", 289 | "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", 290 | "dev": true 291 | }, 292 | "glob": { 293 | "version": "7.1.7", 294 | "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", 295 | "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", 296 | "dev": true, 297 | "requires": { 298 | "fs.realpath": "^1.0.0", 299 | "inflight": "^1.0.4", 300 | "inherits": "2", 301 | "minimatch": "^3.0.4", 302 | "once": "^1.3.0", 303 | "path-is-absolute": "^1.0.0" 304 | } 305 | }, 306 | "glob-parent": { 307 | "version": "5.1.2", 308 | "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", 309 | "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", 310 | "dev": true, 311 | "requires": { 312 | "is-glob": "^4.0.1" 313 | } 314 | }, 315 | "growl": { 316 | "version": "1.10.5", 317 | "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.5.tgz", 318 | "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==", 319 | "dev": true 320 | }, 321 | "has-flag": { 322 | "version": "4.0.0", 323 | "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", 324 | "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", 325 | "dev": true 326 | }, 327 | "he": { 328 | "version": "1.2.0", 329 | "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", 330 | "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", 331 | "dev": true 332 | }, 333 | "inflight": { 334 | "version": "1.0.6", 335 | "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", 336 | "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", 337 | "dev": true, 338 | "requires": { 339 | "once": "^1.3.0", 340 | "wrappy": "1" 341 | } 342 | }, 343 | "inherits": { 344 | "version": "2.0.4", 345 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 346 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", 347 | "dev": true 348 | }, 349 | "is-binary-path": { 350 | "version": "2.1.0", 351 | "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", 352 | "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", 353 | "dev": true, 354 | "requires": { 355 | "binary-extensions": "^2.0.0" 356 | } 357 | }, 358 | "is-extglob": { 359 | "version": "2.1.1", 360 | "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", 361 | "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", 362 | "dev": true 363 | }, 364 | "is-fullwidth-code-point": { 365 | "version": "2.0.0", 366 | "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", 367 | "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", 368 | "dev": true 369 | }, 370 | "is-glob": { 371 | "version": "4.0.1", 372 | "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", 373 | "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", 374 | "dev": true, 375 | "requires": { 376 | "is-extglob": "^2.1.1" 377 | } 378 | }, 379 | "is-number": { 380 | "version": "7.0.0", 381 | "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", 382 | "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", 383 | "dev": true 384 | }, 385 | "is-plain-obj": { 386 | "version": "2.1.0", 387 | "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", 388 | "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==", 389 | "dev": true 390 | }, 391 | "is-unicode-supported": { 392 | "version": "0.1.0", 393 | "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", 394 | "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", 395 | "dev": true 396 | }, 397 | "isexe": { 398 | "version": "2.0.0", 399 | "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", 400 | "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", 401 | "dev": true 402 | }, 403 | "js-yaml": { 404 | "version": "4.1.0", 405 | "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", 406 | "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", 407 | "dev": true, 408 | "requires": { 409 | "argparse": "^2.0.1" 410 | } 411 | }, 412 | "locate-path": { 413 | "version": "6.0.0", 414 | "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", 415 | "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", 416 | "dev": true, 417 | "requires": { 418 | "p-locate": "^5.0.0" 419 | } 420 | }, 421 | "log-symbols": { 422 | "version": "4.1.0", 423 | "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", 424 | "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", 425 | "dev": true, 426 | "requires": { 427 | "chalk": "^4.1.0", 428 | "is-unicode-supported": "^0.1.0" 429 | } 430 | }, 431 | "minimatch": { 432 | "version": "3.0.4", 433 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", 434 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", 435 | "dev": true, 436 | "requires": { 437 | "brace-expansion": "^1.1.7" 438 | } 439 | }, 440 | "mocha": { 441 | "version": "9.0.2", 442 | "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.0.2.tgz", 443 | "integrity": "sha512-FpspiWU+UT9Sixx/wKimvnpkeW0mh6ROAKkIaPokj3xZgxeRhcna/k5X57jJghEr8X+Cgu/Vegf8zCX5ugSuTA==", 444 | "dev": true, 445 | "requires": { 446 | "@ungap/promise-all-settled": "1.1.2", 447 | "ansi-colors": "4.1.1", 448 | "browser-stdout": "1.3.1", 449 | "chokidar": "3.5.2", 450 | "debug": "4.3.1", 451 | "diff": "5.0.0", 452 | "escape-string-regexp": "4.0.0", 453 | "find-up": "5.0.0", 454 | "glob": "7.1.7", 455 | "growl": "1.10.5", 456 | "he": "1.2.0", 457 | "js-yaml": "4.1.0", 458 | "log-symbols": "4.1.0", 459 | "minimatch": "3.0.4", 460 | "ms": "2.1.3", 461 | "nanoid": "3.1.23", 462 | "serialize-javascript": "6.0.0", 463 | "strip-json-comments": "3.1.1", 464 | "supports-color": "8.1.1", 465 | "which": "2.0.2", 466 | "wide-align": "1.1.3", 467 | "workerpool": "6.1.5", 468 | "yargs": "16.2.0", 469 | "yargs-parser": "20.2.4", 470 | "yargs-unparser": "2.0.0" 471 | } 472 | }, 473 | "ms": { 474 | "version": "2.1.3", 475 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", 476 | "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", 477 | "dev": true 478 | }, 479 | "nanoid": { 480 | "version": "3.1.23", 481 | "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", 482 | "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", 483 | "dev": true 484 | }, 485 | "normalize-path": { 486 | "version": "3.0.0", 487 | "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", 488 | "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", 489 | "dev": true 490 | }, 491 | "once": { 492 | "version": "1.4.0", 493 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 494 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", 495 | "dev": true, 496 | "requires": { 497 | "wrappy": "1" 498 | } 499 | }, 500 | "p-limit": { 501 | "version": "3.1.0", 502 | "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", 503 | "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", 504 | "dev": true, 505 | "requires": { 506 | "yocto-queue": "^0.1.0" 507 | } 508 | }, 509 | "p-locate": { 510 | "version": "5.0.0", 511 | "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", 512 | "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", 513 | "dev": true, 514 | "requires": { 515 | "p-limit": "^3.0.2" 516 | } 517 | }, 518 | "path-exists": { 519 | "version": "4.0.0", 520 | "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", 521 | "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", 522 | "dev": true 523 | }, 524 | "path-is-absolute": { 525 | "version": "1.0.1", 526 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", 527 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", 528 | "dev": true 529 | }, 530 | "picomatch": { 531 | "version": "2.3.0", 532 | "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.0.tgz", 533 | "integrity": "sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==", 534 | "dev": true 535 | }, 536 | "randombytes": { 537 | "version": "2.1.0", 538 | "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", 539 | "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", 540 | "dev": true, 541 | "requires": { 542 | "safe-buffer": "^5.1.0" 543 | } 544 | }, 545 | "readdirp": { 546 | "version": "3.6.0", 547 | "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", 548 | "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", 549 | "dev": true, 550 | "requires": { 551 | "picomatch": "^2.2.1" 552 | } 553 | }, 554 | "require-directory": { 555 | "version": "2.1.1", 556 | "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", 557 | "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", 558 | "dev": true 559 | }, 560 | "safe-buffer": { 561 | "version": "5.2.1", 562 | "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", 563 | "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", 564 | "dev": true 565 | }, 566 | "serialize-javascript": { 567 | "version": "6.0.0", 568 | "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", 569 | "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", 570 | "dev": true, 571 | "requires": { 572 | "randombytes": "^2.1.0" 573 | } 574 | }, 575 | "string-width": { 576 | "version": "2.1.1", 577 | "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", 578 | "integrity": "sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==", 579 | "dev": true, 580 | "requires": { 581 | "is-fullwidth-code-point": "^2.0.0", 582 | "strip-ansi": "^4.0.0" 583 | } 584 | }, 585 | "strip-ansi": { 586 | "version": "4.0.0", 587 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz", 588 | "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=", 589 | "dev": true, 590 | "requires": { 591 | "ansi-regex": "^3.0.0" 592 | } 593 | }, 594 | "strip-json-comments": { 595 | "version": "3.1.1", 596 | "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", 597 | "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", 598 | "dev": true 599 | }, 600 | "supports-color": { 601 | "version": "8.1.1", 602 | "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", 603 | "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", 604 | "dev": true, 605 | "requires": { 606 | "has-flag": "^4.0.0" 607 | } 608 | }, 609 | "to-regex-range": { 610 | "version": "5.0.1", 611 | "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", 612 | "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", 613 | "dev": true, 614 | "requires": { 615 | "is-number": "^7.0.0" 616 | } 617 | }, 618 | "which": { 619 | "version": "2.0.2", 620 | "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", 621 | "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", 622 | "dev": true, 623 | "requires": { 624 | "isexe": "^2.0.0" 625 | } 626 | }, 627 | "wide-align": { 628 | "version": "1.1.3", 629 | "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.3.tgz", 630 | "integrity": "sha512-QGkOQc8XL6Bt5PwnsExKBPuMKBxnGxWWW3fU55Xt4feHozMUhdUMaBCk290qpm/wG5u/RSKzwdAC4i51YigihA==", 631 | "dev": true, 632 | "requires": { 633 | "string-width": "^1.0.2 || 2" 634 | } 635 | }, 636 | "workerpool": { 637 | "version": "6.1.5", 638 | "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.1.5.tgz", 639 | "integrity": "sha512-XdKkCK0Zqc6w3iTxLckiuJ81tiD/o5rBE/m+nXpRCB+/Sq4DqkfXZ/x0jW02DG1tGsfUGXbTJyZDP+eu67haSw==", 640 | "dev": true 641 | }, 642 | "wrap-ansi": { 643 | "version": "7.0.0", 644 | "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", 645 | "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", 646 | "dev": true, 647 | "requires": { 648 | "ansi-styles": "^4.0.0", 649 | "string-width": "^4.1.0", 650 | "strip-ansi": "^6.0.0" 651 | }, 652 | "dependencies": { 653 | "ansi-regex": { 654 | "version": "5.0.0", 655 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", 656 | "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", 657 | "dev": true 658 | }, 659 | "is-fullwidth-code-point": { 660 | "version": "3.0.0", 661 | "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", 662 | "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", 663 | "dev": true 664 | }, 665 | "string-width": { 666 | "version": "4.2.2", 667 | "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", 668 | "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", 669 | "dev": true, 670 | "requires": { 671 | "emoji-regex": "^8.0.0", 672 | "is-fullwidth-code-point": "^3.0.0", 673 | "strip-ansi": "^6.0.0" 674 | } 675 | }, 676 | "strip-ansi": { 677 | "version": "6.0.0", 678 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", 679 | "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", 680 | "dev": true, 681 | "requires": { 682 | "ansi-regex": "^5.0.0" 683 | } 684 | } 685 | } 686 | }, 687 | "wrappy": { 688 | "version": "1.0.2", 689 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 690 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", 691 | "dev": true 692 | }, 693 | "y18n": { 694 | "version": "5.0.8", 695 | "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", 696 | "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", 697 | "dev": true 698 | }, 699 | "yargs": { 700 | "version": "16.2.0", 701 | "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", 702 | "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", 703 | "dev": true, 704 | "requires": { 705 | "cliui": "^7.0.2", 706 | "escalade": "^3.1.1", 707 | "get-caller-file": "^2.0.5", 708 | "require-directory": "^2.1.1", 709 | "string-width": "^4.2.0", 710 | "y18n": "^5.0.5", 711 | "yargs-parser": "^20.2.2" 712 | }, 713 | "dependencies": { 714 | "ansi-regex": { 715 | "version": "5.0.0", 716 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", 717 | "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", 718 | "dev": true 719 | }, 720 | "is-fullwidth-code-point": { 721 | "version": "3.0.0", 722 | "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", 723 | "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", 724 | "dev": true 725 | }, 726 | "string-width": { 727 | "version": "4.2.2", 728 | "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", 729 | "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", 730 | "dev": true, 731 | "requires": { 732 | "emoji-regex": "^8.0.0", 733 | "is-fullwidth-code-point": "^3.0.0", 734 | "strip-ansi": "^6.0.0" 735 | } 736 | }, 737 | "strip-ansi": { 738 | "version": "6.0.0", 739 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", 740 | "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", 741 | "dev": true, 742 | "requires": { 743 | "ansi-regex": "^5.0.0" 744 | } 745 | } 746 | } 747 | }, 748 | "yargs-parser": { 749 | "version": "20.2.4", 750 | "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.4.tgz", 751 | "integrity": "sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==", 752 | "dev": true 753 | }, 754 | "yargs-unparser": { 755 | "version": "2.0.0", 756 | "resolved": "https://registry.npmjs.org/yargs-unparser/-/yargs-unparser-2.0.0.tgz", 757 | "integrity": "sha512-7pRTIA9Qc1caZ0bZ6RYRGbHJthJWuakf+WmHK0rVeLkNrrGhfoabBNdue6kdINI6r4if7ocq9aD/n7xwKOdzOA==", 758 | "dev": true, 759 | "requires": { 760 | "camelcase": "^6.0.0", 761 | "decamelize": "^4.0.0", 762 | "flat": "^5.0.2", 763 | "is-plain-obj": "^2.1.0" 764 | } 765 | }, 766 | "yocto-queue": { 767 | "version": "0.1.0", 768 | "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", 769 | "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", 770 | "dev": true 771 | } 772 | } 773 | } 774 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "classificator", 3 | "description": "Naive Bayes classifier with verbose informations for node.js", 4 | "version": "0.3.4", 5 | "author": "Wozacosta", 6 | "keywords": [ 7 | "naive", 8 | "bayes", 9 | "categorize", 10 | "classify", 11 | "classifier", 12 | "classification", 13 | "classificator", 14 | "nbayes", 15 | "likelihood", 16 | "machine learning", 17 | "bayesian" 18 | ], 19 | "dependencies": { 20 | "decimal.js": "^10.0.0" 21 | }, 22 | "devDependencies": { 23 | "mocha": "^9.0.2" 24 | }, 25 | "engines": { 26 | "node": ">=5.0.0" 27 | }, 28 | "main": "./lib/classificator", 29 | "repository": { 30 | "type": "git", 31 | "url": "https://github.com/Wozacosta/classificator.git" 32 | }, 33 | "scripts": { 34 | "test": "mocha -t 30000 -R spec" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test/classificator.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | , fs = require('fs') 3 | , path = require('path') 4 | , bayes = require('../lib/classificator') 5 | 6 | describe('bayes() init', function () { 7 | it('valid options (falsey or with an object) do not raise Errors', function () { 8 | var validOptionsCases = [ undefined, {} ]; 9 | 10 | validOptionsCases.forEach(function (validOptions) { 11 | var classifier = bayes(validOptions) 12 | assert.deepEqual(classifier.options, {}) 13 | }) 14 | }) 15 | 16 | it('invalid options (truthy and not object) raise TypeError during init', function () { 17 | var invalidOptionsCases = [ null, 0, 'a', [] ]; 18 | 19 | invalidOptionsCases.forEach(function (invalidOptions) { 20 | assert.throws(function () { bayes(invalidOptions) }, Error) 21 | // check that it's a TypeError 22 | assert.throws(function () { bayes(invalidOptions) }, TypeError) 23 | }) 24 | }) 25 | }) 26 | 27 | describe('bayes using custom tokenizer', function () { 28 | it('uses custom tokenization function if one is provided in `options`.', function () { 29 | var splitOnChar = function (text) { 30 | return text.split('') 31 | } 32 | 33 | var classifier = bayes({ tokenizer: splitOnChar }) 34 | 35 | classifier.learn('abcd', 'happy') 36 | 37 | // check classifier's state is as expected 38 | assert.equal(classifier.totalDocuments, 1) 39 | assert.equal(classifier.docCount.happy, 1) 40 | assert.deepEqual(classifier.vocabulary, { a: 1, b: 1, c: 1, d: 1 }) 41 | assert.equal(classifier.vocabularySize, 4) 42 | assert.equal(classifier.wordCount.happy, 4) 43 | assert.equal(classifier.wordFrequencyCount.happy.a, 1) 44 | assert.equal(classifier.wordFrequencyCount.happy.b, 1) 45 | assert.equal(classifier.wordFrequencyCount.happy.c, 1) 46 | assert.equal(classifier.wordFrequencyCount.happy.d, 1) 47 | assert.deepEqual(classifier.categories, { happy: 1 }) 48 | }) 49 | }) 50 | 51 | describe('bayes serializing/deserializing its state', function () { 52 | it('serializes/deserializes its state as JSON correctly.', function (done) { 53 | var classifier = bayes() 54 | 55 | classifier.learn('Fun times were had by all', 'positive') 56 | classifier.learn('sad dark rainy day in the cave', 'negative') 57 | 58 | var jsonRepr = classifier.toJson() 59 | 60 | // check serialized values 61 | var state = JSON.parse(jsonRepr) 62 | 63 | // ensure classifier's state values are all in the json representation 64 | bayes.STATE_KEYS.forEach(function (k) { 65 | assert.deepEqual(state[k], classifier[k]) 66 | }) 67 | 68 | var revivedClassifier = bayes.fromJson(jsonRepr) 69 | 70 | // ensure the revived classifier's state is same as original state 71 | bayes.STATE_KEYS.forEach(function (k) { 72 | assert.deepEqual(revivedClassifier[k], classifier[k]) 73 | }) 74 | 75 | done() 76 | }) 77 | }) 78 | 79 | describe('bayes .learn() correctness', function () { 80 | //sentiment analysis test 81 | it('categorizes correctly for `positive` and `negative` categories', function (done) { 82 | 83 | let classifier = bayes(); 84 | 85 | //teach it positive phrases 86 | classifier.learn('amazing, awesome movie!! Yeah!!', 'positive') 87 | classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive') 88 | 89 | //teach it a negative phrase 90 | classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative') 91 | 92 | //teach it a neutral phrase 93 | classifier.learn('I dont really know what to make of this.', 'neutral') 94 | 95 | //now test it to see that it correctly categorizes a new document 96 | assert.deepEqual(classifier.categorize('awesome, cool, amazing!! Yay.').predictedCategory, 'positive') 97 | done() 98 | }) 99 | 100 | //topic analysis test 101 | it('categorizes correctly for `chinese` and `japanese` categories', function (done) { 102 | 103 | var classifier = bayes() 104 | 105 | //teach it how to identify the `chinese` category 106 | classifier.learn('Chinese Beijing Chinese', 'chinese') 107 | classifier.learn('Chinese Chinese Shanghai', 'chinese') 108 | classifier.learn('Chinese Macao', 'chinese') 109 | 110 | //teach it how to identify the `japanese` category 111 | classifier.learn('Tokyo Japan Chinese', 'japanese') 112 | 113 | //make sure it learned the `chinese` category correctly 114 | var chineseFrequencyCount = classifier.wordFrequencyCount.chinese 115 | 116 | assert.equal(chineseFrequencyCount['Chinese'], 5) 117 | assert.equal(chineseFrequencyCount['Beijing'], 1) 118 | assert.equal(chineseFrequencyCount['Shanghai'], 1) 119 | assert.equal(chineseFrequencyCount['Macao'], 1) 120 | 121 | //make sure it learned the `japanese` category correctly 122 | var japaneseFrequencyCount = classifier.wordFrequencyCount.japanese 123 | 124 | assert.equal(japaneseFrequencyCount['Tokyo'], 1) 125 | assert.equal(japaneseFrequencyCount['Japan'], 1) 126 | assert.equal(japaneseFrequencyCount['Chinese'], 1) 127 | 128 | //now test it to see that it correctly categorizes a new document 129 | assert.deepEqual(classifier.categorize('Chinese Chinese Chinese Tokyo Japan').predictedCategory,'chinese') 130 | 131 | done() 132 | }) 133 | 134 | it('correctly tokenizes cyrlic characters', function (done) { 135 | var classifier = bayes() 136 | 137 | classifier.learn('Надежда за', 'a') 138 | classifier.learn('Надежда за обич еп.36 Тест', 'b') 139 | classifier.learn('Надежда за обич еп.36 Тест', 'b') 140 | 141 | var aFreqCount = classifier.wordFrequencyCount.a 142 | assert.equal(aFreqCount['Надежда'], 1) 143 | assert.equal(aFreqCount['за'], 1) 144 | 145 | var bFreqCount = classifier.wordFrequencyCount.b 146 | assert.equal(bFreqCount['Надежда'], 2) 147 | assert.equal(bFreqCount['за'], 2) 148 | assert.equal(bFreqCount['обич'], 2) 149 | assert.equal(bFreqCount['еп'], 2) 150 | assert.equal(bFreqCount['36'], 2) 151 | assert.equal(bFreqCount['Тест'], 2) 152 | 153 | done() 154 | }) 155 | 156 | it('correctly computes probabilities without prior', function (done) { 157 | var classifier = bayes({ fitPrior: false}) 158 | 159 | // learn on a very unbalanced dataset 160 | classifier.learn('aa', '1') 161 | classifier.learn('aa', '1') 162 | classifier.learn('aa', '1') 163 | classifier.learn('bb', '2') 164 | 165 | // test the likelihoods obtained on test strings 166 | assert.equal(classifier.categorize('cc').likelihoods[0].proba, 0.5) 167 | assert.equal(Number(classifier.categorize('bb').likelihoods[0].proba).toFixed(6), Number(0.76923077).toFixed(6)) 168 | assert.equal(Number(classifier.categorize('aa').likelihoods[0].proba).toFixed(6), Number(0.70588235).toFixed(6)) 169 | 170 | done() 171 | }) 172 | 173 | it('correctly computes probabilities with prior', function (done) { 174 | var classifier = bayes() 175 | 176 | // learn on a very unbalanced dataset 177 | classifier.learn('aa', '1') 178 | classifier.learn('aa', '1') 179 | classifier.learn('aa', '1') 180 | classifier.learn('bb', '2') 181 | 182 | // test the likelihoods obtained on test strings 183 | assert.equal(classifier.categorize('cc').likelihoods[0].proba, 0.75) 184 | assert.equal(Number(classifier.categorize('bb').likelihoods[0].proba).toFixed(6), Number(0.52631579).toFixed(6)) 185 | assert.equal(Number(classifier.categorize('aa').likelihoods[0].proba).toFixed(6), Number(0.87804878).toFixed(6)) 186 | 187 | done() 188 | }) 189 | }) 190 | 191 | -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | balanced-match@^1.0.0: 6 | version "1.0.0" 7 | resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" 8 | 9 | brace-expansion@^1.1.7: 10 | version "1.1.8" 11 | resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.8.tgz#c07b211c7c952ec1f8efd51a77ef0d1d3990a292" 12 | dependencies: 13 | balanced-match "^1.0.0" 14 | concat-map "0.0.1" 15 | 16 | browser-stdout@1.3.0: 17 | version "1.3.0" 18 | resolved "https://registry.yarnpkg.com/browser-stdout/-/browser-stdout-1.3.0.tgz#f351d32969d32fa5d7a5567154263d928ae3bd1f" 19 | 20 | commander@2.11.0: 21 | version "2.11.0" 22 | resolved "https://registry.yarnpkg.com/commander/-/commander-2.11.0.tgz#157152fd1e7a6c8d98a5b715cf376df928004563" 23 | 24 | concat-map@0.0.1: 25 | version "0.0.1" 26 | resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" 27 | 28 | debug@3.1.0: 29 | version "3.1.0" 30 | resolved "https://registry.yarnpkg.com/debug/-/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261" 31 | dependencies: 32 | ms "2.0.0" 33 | 34 | decimal.js@^10.0.0: 35 | version "10.0.1" 36 | resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.0.1.tgz#d04b16b277f0f9af09671cee225c4882e8857c58" 37 | 38 | diff@3.3.1: 39 | version "3.3.1" 40 | resolved "https://registry.yarnpkg.com/diff/-/diff-3.3.1.tgz#aa8567a6eed03c531fc89d3f711cd0e5259dec75" 41 | 42 | escape-string-regexp@1.0.5: 43 | version "1.0.5" 44 | resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" 45 | 46 | fs.realpath@^1.0.0: 47 | version "1.0.0" 48 | resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" 49 | 50 | glob@7.1.2: 51 | version "7.1.2" 52 | resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.2.tgz#c19c9df9a028702d678612384a6552404c636d15" 53 | dependencies: 54 | fs.realpath "^1.0.0" 55 | inflight "^1.0.4" 56 | inherits "2" 57 | minimatch "^3.0.4" 58 | once "^1.3.0" 59 | path-is-absolute "^1.0.0" 60 | 61 | growl@1.10.3: 62 | version "1.10.3" 63 | resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.3.tgz#1926ba90cf3edfe2adb4927f5880bc22c66c790f" 64 | 65 | has-flag@^2.0.0: 66 | version "2.0.0" 67 | resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-2.0.0.tgz#e8207af1cc7b30d446cc70b734b5e8be18f88d51" 68 | 69 | he@1.1.1: 70 | version "1.1.1" 71 | resolved "https://registry.yarnpkg.com/he/-/he-1.1.1.tgz#93410fd21b009735151f8868c2f271f3427e23fd" 72 | 73 | inflight@^1.0.4: 74 | version "1.0.6" 75 | resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" 76 | dependencies: 77 | once "^1.3.0" 78 | wrappy "1" 79 | 80 | inherits@2: 81 | version "2.0.3" 82 | resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de" 83 | 84 | minimatch@^3.0.4: 85 | version "3.0.4" 86 | resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083" 87 | dependencies: 88 | brace-expansion "^1.1.7" 89 | 90 | minimist@0.0.8: 91 | version "0.0.8" 92 | resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.8.tgz#857fcabfc3397d2625b8228262e86aa7a011b05d" 93 | 94 | mkdirp@0.5.1: 95 | version "0.5.1" 96 | resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903" 97 | dependencies: 98 | minimist "0.0.8" 99 | 100 | mocha@>=0.8.1: 101 | version "4.1.0" 102 | resolved "https://registry.yarnpkg.com/mocha/-/mocha-4.1.0.tgz#7d86cfbcf35cb829e2754c32e17355ec05338794" 103 | dependencies: 104 | browser-stdout "1.3.0" 105 | commander "2.11.0" 106 | debug "3.1.0" 107 | diff "3.3.1" 108 | escape-string-regexp "1.0.5" 109 | glob "7.1.2" 110 | growl "1.10.3" 111 | he "1.1.1" 112 | mkdirp "0.5.1" 113 | supports-color "4.4.0" 114 | 115 | ms@2.0.0: 116 | version "2.0.0" 117 | resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8" 118 | 119 | once@^1.3.0: 120 | version "1.4.0" 121 | resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" 122 | dependencies: 123 | wrappy "1" 124 | 125 | path-is-absolute@^1.0.0: 126 | version "1.0.1" 127 | resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" 128 | 129 | supports-color@4.4.0: 130 | version "4.4.0" 131 | resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-4.4.0.tgz#883f7ddabc165142b2a61427f3352ded195d1a3e" 132 | dependencies: 133 | has-flag "^2.0.0" 134 | 135 | wrappy@1: 136 | version "1.0.2" 137 | resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" 138 | --------------------------------------------------------------------------------