├── docs ├── CNAME ├── .vuepress │ ├── public │ │ ├── logo.png │ │ ├── favicon.ico │ │ ├── apple-icon.png │ │ ├── favicon-16x16.png │ │ ├── favicon-32x32.png │ │ ├── favicon-96x96.png │ │ ├── ms-icon-70x70.png │ │ ├── apple-icon-57x57.png │ │ ├── apple-icon-60x60.png │ │ ├── apple-icon-72x72.png │ │ ├── apple-icon-76x76.png │ │ ├── ms-icon-144x144.png │ │ ├── ms-icon-150x150.png │ │ ├── ms-icon-310x310.png │ │ ├── android-icon-36x36.png │ │ ├── android-icon-48x48.png │ │ ├── android-icon-72x72.png │ │ ├── android-icon-96x96.png │ │ ├── apple-icon-114x114.png │ │ ├── apple-icon-120x120.png │ │ ├── apple-icon-144x144.png │ │ ├── apple-icon-152x152.png │ │ ├── apple-icon-180x180.png │ │ ├── android-icon-144x144.png │ │ ├── android-icon-192x192.png │ │ ├── apple-icon-precomposed.png │ │ ├── browserconfig.xml │ │ └── manifest.json │ └── config.js ├── README.md ├── .gitignore └── package.json ├── src ├── core │ ├── .gitignore │ ├── svm │ │ ├── SvmJsDemo.js │ │ ├── SvmPerfDemo.js │ │ ├── SvmLinearDemo.js │ │ ├── SvmLinearMulticlassDemo.js │ │ ├── SvmJs.js │ │ └── svmcommon.js │ ├── neural │ │ └── NeuralNetwork.js │ ├── winnow │ │ └── WinnowHashDemo.js │ └── multilabel │ │ ├── index.js │ │ ├── multilabelutils.js │ │ ├── BinaryRelevanceDemo.js │ │ └── MetaLabeler.js ├── features │ ├── LowerCaseNormalizer.js │ ├── README.md │ ├── NGramsOfWords.js │ ├── NGramsFromArray.js │ ├── RegexpNormalizer.js │ ├── NGramsOfLetters.js │ ├── RegexpSplitter.js │ ├── HypernymExtractor.js │ ├── index.js │ └── FeatureLookupTable.js ├── formats │ ├── index.js │ ├── json.js │ ├── tsv.js │ ├── svmlight.js │ └── arff.js ├── utils │ ├── index.js │ ├── hamming.js │ ├── list.js │ ├── unseen_correlation.js │ └── partitions.js └── index.js ├── test ├── mocha.opts ├── tempfiles │ └── .gitignore ├── wordcounts.js ├── sorted.js ├── utilsTest │ ├── HammingDistanceTest.js │ ├── ListTest.js │ └── PartitionsTest.js ├── test_utils.js ├── generaterandom.js ├── featuresTest │ ├── FeatureLookupTableTest.js │ ├── RegexpNormalizerTest.js │ ├── RegexpSplitterTest.js │ └── FeatureExtractorTest.js └── classifiersTest │ ├── multilabel │ ├── ClassifierWithSplitterTest.js │ ├── MulticlassSegmentationBayesTest.js │ ├── MetaLabelerLanguageModelTest.js │ ├── MetaLabelerSvmTest.js │ └── MetaLabelerWinnowTest.js │ ├── WinnowExampleTest.js │ ├── NeuralWithSpellCheckerTest.js │ ├── SvmJsTest.js │ ├── NeuralWithFeatureExtractorTest.js │ ├── NeuralWithNormalizerTest.js │ ├── WinnowTest.js │ ├── SvmMulticlassTest.js │ └── SvmTest.js ├── .babelrc ├── renovate.json ├── .github ├── ISSUE_TEMPLATE │ ├── custom.md │ ├── feature_request.md │ └── bug_report.md └── FUNDING.yml ├── index.js ├── dist ├── features │ ├── LowerCaseNormalizer.js │ ├── NGramsOfWords.js │ ├── NGramsFromArray.js │ ├── RegexpNormalizer.js │ ├── NGramsOfLetters.js │ ├── RegexpSplitter.js │ ├── HypernymExtractor.js │ ├── index.js │ └── FeatureLookupTable.js ├── utils │ ├── index.js │ ├── hamming.js │ ├── list.js │ ├── unseen_correlation.js │ └── partitions.js ├── formats │ ├── index.js │ ├── json.js │ ├── tsv.js │ ├── svmlight.js │ └── arff.js ├── core │ ├── svm │ │ ├── SvmJsDemo.js │ │ ├── SvmPerfDemo.js │ │ ├── SvmLinearDemo.js │ │ ├── SvmLinearMulticlassDemo.js │ │ ├── SvmJs.js │ │ └── svmcommon.js │ ├── neural │ │ └── NeuralNetwork.js │ ├── decisiontree │ │ ├── DecisionTreeDemo.js │ │ └── DecisionTree.js │ ├── winnow │ │ └── WinnowHashDemo.js │ └── multilabel │ │ ├── index.js │ │ ├── multilabelutils.js │ │ ├── BinaryRelevanceDemo.js │ │ └── MetaLabeler.js └── index.js ├── .gitignore ├── package.json ├── README.md └── CODE_OF_CONDUCT.md /docs/CNAME: -------------------------------------------------------------------------------- 1 | neuro.js.org -------------------------------------------------------------------------------- /src/core/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --recursive 2 | -------------------------------------------------------------------------------- /test/tempfiles/.gitignore: -------------------------------------------------------------------------------- 1 | /Svm*.* -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env"] 3 | } -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /docs/.vuepress/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/logo.png -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon.ico -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon.png -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-16x16.png -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-32x32.png -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-96x96.png -------------------------------------------------------------------------------- /docs/.vuepress/public/ms-icon-70x70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-70x70.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-57x57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-57x57.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-60x60.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-72x72.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-76x76.png -------------------------------------------------------------------------------- /docs/.vuepress/public/ms-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-144x144.png -------------------------------------------------------------------------------- /docs/.vuepress/public/ms-icon-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-150x150.png -------------------------------------------------------------------------------- /docs/.vuepress/public/ms-icon-310x310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-310x310.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-36x36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-36x36.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-48x48.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-72x72.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-96x96.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-114x114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-114x114.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-120x120.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-144x144.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-152x152.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-180x180.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-144x144.png -------------------------------------------------------------------------------- /docs/.vuepress/public/android-icon-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-192x192.png -------------------------------------------------------------------------------- /docs/.vuepress/public/apple-icon-precomposed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-precomposed.png -------------------------------------------------------------------------------- /src/features/LowerCaseNormalizer.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Normalizes a sentence by converting it to lower case. 3 | */ 4 | export default function (sample) { 5 | return sample.toLowerCase(); 6 | }; 7 | -------------------------------------------------------------------------------- /src/formats/index.js: -------------------------------------------------------------------------------- 1 | export const arff = require("./arff"); 2 | export const json = require("./json"); 3 | export const tsv = require("./tsv"); 4 | export const svmlight = require("./svmlight"); 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | export const classifiers = require('./dist/core'); 2 | export const features = require('./dist/features'); 3 | export const formats = require('./dist/formats'); 4 | export const utils = require('./dist/utils'); 5 | -------------------------------------------------------------------------------- /src/features/README.md: -------------------------------------------------------------------------------- 1 | This folder should contain several kinds of feature extractors. 2 | 3 | A feature extractor is a function that takes an input object, and returns a features object for that object, for use in training and/or classification. 4 | -------------------------------------------------------------------------------- /docs/.vuepress/public/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | #ffffff -------------------------------------------------------------------------------- /dist/features/LowerCaseNormalizer.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * Normalizes a sentence by converting it to lower case. 10 | */ 11 | function _default(sample) { 12 | return sample.toLowerCase(); 13 | } 14 | 15 | ; -------------------------------------------------------------------------------- /src/utils/index.js: -------------------------------------------------------------------------------- 1 | var trainAndTest = require("./trainAndTest"); 2 | module.exports = { 3 | hash: require("./hash"), 4 | partitions: require("./partitions"), 5 | PrecisionRecall: require("./PrecisionRecall"), 6 | test: trainAndTest.test, 7 | compare: trainAndTest.compare, 8 | hammingDistance: require("./hamming").hammingDistance, 9 | }; 10 | -------------------------------------------------------------------------------- /dist/utils/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var trainAndTest = require("./trainAndTest"); 4 | 5 | module.exports = { 6 | hash: require("./hash"), 7 | partitions: require("./partitions"), 8 | PrecisionRecall: require("./PrecisionRecall"), 9 | test: trainAndTest.test, 10 | compare: trainAndTest.compare, 11 | hammingDistance: require("./hamming").hammingDistance 12 | }; -------------------------------------------------------------------------------- /test/wordcounts.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Simple calculation of word-counts in a sentence. 3 | * @param sentence 4 | * @return a hash {word1: count1, word2: count2,...} 5 | * words are separated by spaces. 6 | */ 7 | export default function (sentence) { 8 | return sentence.split(' ').reduce(function (counts, word) { 9 | counts[word] = (counts[word] || 0) + 1; 10 | return counts; 11 | }, {}); 12 | } -------------------------------------------------------------------------------- /src/formats/json.js: -------------------------------------------------------------------------------- 1 | /** 2 | * convert a single dataset to compact JSON format. 3 | * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]} 4 | */ 5 | export function toJSON(dataset) { 6 | json = "["; 7 | for (var i=0; i0? "\n, ": "\n ")+ 10 | JSON.stringify(dataset[i])); 11 | } 12 | json += "\n]\n"; 13 | return json; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/utils/hamming.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Calculate Hamming distance between two sets 3 | * @param a, b - arrays 4 | * @return number of elements in a-b plus number of elements in b-a 5 | */ 6 | export function hammingDistance(a, b) { 7 | var d = 0; 8 | for (var i = 0; i < a.length; ++i) { 9 | if (b.indexOf(a[i]) < 0) d++; 10 | } 11 | for (var i = 0; i < b.length; ++i) { 12 | if (a.indexOf(b[i]) < 0) d++; 13 | } 14 | return d; 15 | } 16 | -------------------------------------------------------------------------------- /src/features/NGramsOfWords.js: -------------------------------------------------------------------------------- 1 | /** 2 | * NGramExtractor - extracts sequences of words in a text as its features. 3 | */ 4 | 5 | import NGramsFromArray from './NGramsFromArray'; 6 | export default function (numOfWords, gap) { 7 | return function (sample, features) { 8 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) { 9 | return !!a 10 | }); // all non-empty words 11 | NGramsFromArray(numOfWords, gap, words, features); 12 | }; 13 | }; 14 | -------------------------------------------------------------------------------- /dist/formats/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.svmlight = exports.tsv = exports.json = exports.arff = void 0; 7 | 8 | var arff = require("./arff"); 9 | 10 | exports.arff = arff; 11 | 12 | var json = require("./json"); 13 | 14 | exports.json = json; 15 | 16 | var tsv = require("./tsv"); 17 | 18 | exports.tsv = tsv; 19 | 20 | var svmlight = require("./svmlight"); 21 | 22 | exports.svmlight = svmlight; -------------------------------------------------------------------------------- /test/sorted.js: -------------------------------------------------------------------------------- 1 | /** 2 | * This unit adds a non-intrusive property "sorted" to the Array prototype. 3 | * 4 | * It is used only for testing, when the order of the output array is not important. For example: 5 | * 6 | * classifier.classify("I want aa bb").sorted().should.eql(['A','B']); 7 | * 8 | * @author Erel Segal-Halevi 9 | * @since 2013-09-09 10 | */ 11 | 12 | Object.defineProperty(Array.prototype, 'sorted', { 13 | value: function() { this.sort(); return this; } 14 | }); 15 | -------------------------------------------------------------------------------- /src/formats/tsv.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Small utility for writing a dataset in tab-separated-values format. 3 | * 4 | * @author Erel Segal-Halevi 5 | * @since 2013-08 6 | */ 7 | 8 | 9 | /** 10 | * Write the dataset, one sample per line, with the given separator between sample and output. 11 | */ 12 | exports.toTSV = function(dataset, separator) { 13 | if (!separator) { 14 | separator="\t"; 15 | } 16 | dataset.forEach(function(sample) { 17 | console.log(JSON.stringify(sample.input)+separator+"["+sample.output+"]"); 18 | }); 19 | }; 20 | -------------------------------------------------------------------------------- /dist/formats/json.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.toJSON = toJSON; 7 | 8 | /** 9 | * convert a single dataset to compact JSON format. 10 | * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]} 11 | */ 12 | function toJSON(dataset) { 13 | json = "["; 14 | 15 | for (var i = 0; i < dataset.length; ++i) { 16 | json += (i > 0 ? "\n, " : "\n ") + JSON.stringify(dataset[i]); 17 | } 18 | 19 | json += "\n]\n"; 20 | return json; 21 | } -------------------------------------------------------------------------------- /dist/formats/tsv.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * Small utility for writing a dataset in tab-separated-values format. 5 | * 6 | * @author Erel Segal-Halevi 7 | * @since 2013-08 8 | */ 9 | 10 | /** 11 | * Write the dataset, one sample per line, with the given separator between sample and output. 12 | */ 13 | exports.toTSV = function (dataset, separator) { 14 | if (!separator) { 15 | separator = "\t"; 16 | } 17 | 18 | dataset.forEach(function (sample) { 19 | console.log(JSON.stringify(sample.input) + separator + "[" + sample.output + "]"); 20 | }); 21 | }; -------------------------------------------------------------------------------- /dist/utils/hamming.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.hammingDistance = hammingDistance; 7 | 8 | /** 9 | * Calculate Hamming distance between two sets 10 | * @param a, b - arrays 11 | * @return number of elements in a-b plus number of elements in b-a 12 | */ 13 | function hammingDistance(a, b) { 14 | var d = 0; 15 | 16 | for (var i = 0; i < a.length; ++i) { 17 | if (b.indexOf(a[i]) < 0) d++; 18 | } 19 | 20 | for (var i = 0; i < b.length; ++i) { 21 | if (a.indexOf(b[i]) < 0) d++; 22 | } 23 | 24 | return d; 25 | } -------------------------------------------------------------------------------- /test/utilsTest/HammingDistanceTest.js: -------------------------------------------------------------------------------- 1 | #!mocha 2 | 3 | /** 4 | * a unit-test for Multi-Label classification 5 | */ 6 | 7 | import { hammingDistance } from "../../dist/utils"; 8 | 9 | describe("Hamming distance", function() { 10 | it("calculates hamming distance", function() { 11 | hammingDistance([], []).should.equal(0); 12 | hammingDistance(["a"], []).should.equal(1); 13 | hammingDistance([], ["a"]).should.equal(1); 14 | hammingDistance(["a"], ["a"]).should.equal(0); 15 | hammingDistance(["a"], ["b"]).should.equal(2); 16 | hammingDistance(["a", "b"], ["b", "c"]).should.equal(2); 17 | }); 18 | }); 19 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: turtuvshin 5 | open_collective: intelligo 6 | ko_fi: turtuvshin 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /src/features/NGramsFromArray.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap: 3 | */ 4 | export default function (numOfWords, gap, grams, features) { 5 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 6 | grams.unshift("[start]"); 7 | grams.push("[end]"); 8 | } 9 | for (var i = 0; i <= grams.length - numOfWords; ++i) { 10 | let sliceOfWords = grams.slice(i, i + numOfWords); 11 | if (gap) sliceOfWords[1] = "-"; 12 | let feature = sliceOfWords.join(" "); 13 | features[feature.trim()] = 1; 14 | } 15 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 16 | grams.pop(); 17 | grams.shift(); 18 | } 19 | }; -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /src/core/svm/SvmJsDemo.js: -------------------------------------------------------------------------------- 1 | // simple demonstration of SVM 2 | 3 | var SvmJs = require('./SvmJs'); 4 | 5 | var svm = new SvmJs({C: 1.0}); 6 | 7 | var traindata = [ 8 | {input: [0,0], output: 0}, 9 | {input: [0,1], output: 0}, 10 | {input: [1,0], output: 1}, 11 | {input: [1,1], output: 1}, 12 | ]; 13 | 14 | svm.trainBatch(traindata); 15 | 16 | console.dir(svm.classify([0,2])); // 0 17 | console.dir(svm.classify([1,3])); // 1 18 | 19 | // explain: 20 | console.dir(svm.classify([0,2], 3)); // 0 21 | console.dir(svm.classify([1,3], 3)); // 1 22 | 23 | 24 | //continuous output: 25 | console.dir(svm.classify([0,2], 0, true)); // -1 26 | console.dir(svm.classify([1,3], 0, true)); // 1 27 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | home: true 3 | heroImage: /logo.png 4 | heroText: Neuro 5 | tagline: Neuro.js is machine learning framework for building AI assistants and chat-bots. 6 | actionText: Get Started → 7 | actionLink: /learn/ 8 | features: 9 | - title: ML with Javascript 10 | details: Neuro is a library for developing and training ML models in JavaScript, and deploying in browser or on Node.js 11 | - title: Awesome Feature 12 | details: Neuro is a supports Multi label classification, online learning, real-time classification. 13 | - title: Simplicity & performance 14 | details: Everyone should have access to simple machine learning. Practical machine learning should be simple. 15 | footer: MIT Licensed | Powered by Intelligo Systems 16 | --- -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import EnhancedClassifier from "./core/EnhancedClassifier"; 2 | import multilabel from "./core/multilabel"; 3 | import NeuralNetwork from "./core/neural/NeuralNetwork"; 4 | import SvmJs from "./core/svm/SvmJs"; 5 | import SvmLinear from "./core/svm/SvmLinear"; 6 | import SvmPerf from "./core/svm/SvmPerf"; 7 | import Winnow from "./core/winnow/WinnowHash"; 8 | import features from "./features"; 9 | import formats from "./formats"; 10 | import utils from "./utils"; 11 | 12 | export default { 13 | classifiers: { 14 | NeuralNetwork, 15 | SvmJs, 16 | SvmLinear, 17 | SvmPerf, 18 | Winnow, 19 | multilabel, 20 | EnhancedClassifier 21 | }, 22 | features, 23 | formats, 24 | utils 25 | }; 26 | -------------------------------------------------------------------------------- /src/core/neural/NeuralNetwork.js: -------------------------------------------------------------------------------- 1 | /** 2 | * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain 3 | * 4 | * @author Erel Segal-haLevi 5 | * @since 2013-09-29 6 | */ 7 | 8 | var NeuralNetwork = require('brain.js').NeuralNetwork; 9 | 10 | NeuralNetwork.prototype.trainOnline = function () {throw new Error("NeuralNetwork does not support online training");}; 11 | NeuralNetwork.prototype.train = function(dataset) { 12 | dataset.forEach(function(datum) { 13 | if (!Array.isArray(datum.output) && !(datum.output instanceof Object)){ 14 | datum.output = [datum.output]; 15 | } 16 | }); 17 | this.train(dataset); 18 | }; 19 | NeuralNetwork.prototype.classify = NeuralNetwork.prototype.run; 20 | 21 | module.exports = NeuralNetwork; 22 | -------------------------------------------------------------------------------- /dist/core/svm/SvmJsDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // simple demonstration of SVM 4 | var SvmJs = require('./SvmJs'); 5 | 6 | var svm = new SvmJs({ 7 | C: 1.0 8 | }); 9 | var traindata = [{ 10 | input: [0, 0], 11 | output: 0 12 | }, { 13 | input: [0, 1], 14 | output: 0 15 | }, { 16 | input: [1, 0], 17 | output: 1 18 | }, { 19 | input: [1, 1], 20 | output: 1 21 | }]; 22 | svm.trainBatch(traindata); 23 | console.dir(svm.classify([0, 2])); // 0 24 | 25 | console.dir(svm.classify([1, 3])); // 1 26 | // explain: 27 | 28 | console.dir(svm.classify([0, 2], 3)); // 0 29 | 30 | console.dir(svm.classify([1, 3], 3)); // 1 31 | //continuous output: 32 | 33 | console.dir(svm.classify([0, 2], 0, true)); // -1 34 | 35 | console.dir(svm.classify([1, 3], 0, true)); // 1 -------------------------------------------------------------------------------- /dist/features/NGramsOfWords.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | var _NGramsFromArray = _interopRequireDefault(require("./NGramsFromArray")); 9 | 10 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } 11 | 12 | /** 13 | * NGramExtractor - extracts sequences of words in a text as its features. 14 | */ 15 | function _default(numOfWords, gap) { 16 | return function (sample, features) { 17 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) { 18 | return !!a; 19 | }); // all non-empty words 20 | 21 | (0, _NGramsFromArray["default"])(numOfWords, gap, words, features); 22 | }; 23 | } 24 | 25 | ; -------------------------------------------------------------------------------- /src/core/winnow/WinnowHashDemo.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Demonstrates the winnow classification algorithm. 3 | * 4 | * @author Erel Segal-Halevi 5 | * @since 2013-07 6 | */ 7 | 8 | console.log("Winnow demo start"); 9 | var Winnow = require('./WinnowHash'); 10 | 11 | var classifier = new Winnow({ 12 | default_positive_weight: 1, 13 | default_negative_weight: 1, 14 | threshold: 0, 15 | do_averaging: false, 16 | margin: 1, 17 | }); 18 | 19 | classifier.trainOnline({'a': 1, 'b': 0}, 0); 20 | classifier.trainOnline({'a': 0, 'b': 1}, 0); 21 | classifier.trainOnline({'a': 0, 'b': 0}, 1); 22 | 23 | console.dir(classifier.classify({'a': 0, 'b': 0}, /*explain=*/1)); 24 | console.dir(classifier.classify({'a': 1, 'b': 1}, /*explain=*/3)); 25 | 26 | console.log("Winnow demo end"); 27 | -------------------------------------------------------------------------------- /dist/core/neural/NeuralNetwork.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain 5 | * 6 | * @author Erel Segal-haLevi 7 | * @since 2013-09-29 8 | */ 9 | var NeuralNetwork = require('brain.js').NeuralNetwork; 10 | 11 | NeuralNetwork.prototype.trainOnline = function () { 12 | throw new Error("NeuralNetwork does not support online training"); 13 | }; 14 | 15 | NeuralNetwork.prototype.train = function (dataset) { 16 | dataset.forEach(function (datum) { 17 | if (!Array.isArray(datum.output) && !(datum.output instanceof Object)) { 18 | datum.output = [datum.output]; 19 | } 20 | }); 21 | this.train(dataset); 22 | }; 23 | 24 | NeuralNetwork.prototype.classify = NeuralNetwork.prototype.run; 25 | module.exports = NeuralNetwork; -------------------------------------------------------------------------------- /src/core/multilabel/index.js: -------------------------------------------------------------------------------- 1 | export const BinaryRelevance = require('./BinaryRelevance'); 2 | export const BinarySegmentation = require('./BinarySegmentation'); 3 | export const MulticlassSegmentation = require('./MulticlassSegmentation'); 4 | export const MetaLabeler = require('./MetaLabeler'); 5 | export const CrossLanguageModel = require('./CrossLangaugeModelClassifier'); 6 | export const ThresholdClassifier = require('./ThresholdClassifier'); 7 | 8 | // add a "classify and log" method to all classifiers, for demos: 9 | for (var classifierClass in module.exports) { 10 | if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify) 11 | module.exports[classifierClass].prototype.classifyAndLog = function(sample) { 12 | console.log(sample+" is "+this.classify(sample)); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /dist/features/NGramsFromArray.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap: 10 | */ 11 | function _default(numOfWords, gap, grams, features) { 12 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 13 | grams.unshift("[start]"); 14 | grams.push("[end]"); 15 | } 16 | 17 | for (var i = 0; i <= grams.length - numOfWords; ++i) { 18 | var sliceOfWords = grams.slice(i, i + numOfWords); 19 | if (gap) sliceOfWords[1] = "-"; 20 | var feature = sliceOfWords.join(" "); 21 | features[feature.trim()] = 1; 22 | } 23 | 24 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 25 | grams.pop(); 26 | grams.shift(); 27 | } 28 | } 29 | 30 | ; -------------------------------------------------------------------------------- /dist/core/decisiontree/DecisionTreeDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var _DecisionTree = _interopRequireDefault(require("./DecisionTree")); 4 | 5 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } 6 | 7 | console.log("Decision Tree demo start"); 8 | var classifier = new _DecisionTree["default"]({}); 9 | dataset = [{ 10 | input: { 11 | a: 1, 12 | b: 0 13 | }, 14 | output: 0 15 | }, { 16 | input: { 17 | a: 0, 18 | b: 1 19 | }, 20 | output: 0 21 | }, { 22 | input: { 23 | a: 0, 24 | b: 0 25 | }, 26 | output: 1 27 | }]; 28 | classifier.trainBatch(dataset); 29 | console.dir(classifier.classify({ 30 | 'a': 0, 31 | 'b': 0 32 | }, 33 | /*explain=*/ 34 | 1)); 35 | console.dir(classifier.classify({ 36 | 'a': 1, 37 | 'b': 1 38 | }, 39 | /*explain=*/ 40 | 3)); 41 | console.log("Decision Tree demo end"); -------------------------------------------------------------------------------- /dist/core/winnow/WinnowHashDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * Demonstrates the winnow classification algorithm. 5 | * 6 | * @author Erel Segal-Halevi 7 | * @since 2013-07 8 | */ 9 | console.log("Winnow demo start"); 10 | 11 | var Winnow = require('./WinnowHash'); 12 | 13 | var classifier = new Winnow({ 14 | default_positive_weight: 1, 15 | default_negative_weight: 1, 16 | threshold: 0, 17 | do_averaging: false, 18 | margin: 1 19 | }); 20 | classifier.trainOnline({ 21 | 'a': 1, 22 | 'b': 0 23 | }, 0); 24 | classifier.trainOnline({ 25 | 'a': 0, 26 | 'b': 1 27 | }, 0); 28 | classifier.trainOnline({ 29 | 'a': 0, 30 | 'b': 0 31 | }, 1); 32 | console.dir(classifier.classify({ 33 | 'a': 0, 34 | 'b': 0 35 | }, 36 | /*explain=*/ 37 | 1)); 38 | console.dir(classifier.classify({ 39 | 'a': 1, 40 | 'b': 1 41 | }, 42 | /*explain=*/ 43 | 3)); 44 | console.log("Winnow demo end"); -------------------------------------------------------------------------------- /docs/.vuepress/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "App", 3 | "icons": [ 4 | { 5 | "src": "\/android-icon-36x36.png", 6 | "sizes": "36x36", 7 | "type": "image\/png", 8 | "density": "0.75" 9 | }, 10 | { 11 | "src": "\/android-icon-48x48.png", 12 | "sizes": "48x48", 13 | "type": "image\/png", 14 | "density": "1.0" 15 | }, 16 | { 17 | "src": "\/android-icon-72x72.png", 18 | "sizes": "72x72", 19 | "type": "image\/png", 20 | "density": "1.5" 21 | }, 22 | { 23 | "src": "\/android-icon-96x96.png", 24 | "sizes": "96x96", 25 | "type": "image\/png", 26 | "density": "2.0" 27 | }, 28 | { 29 | "src": "\/android-icon-144x144.png", 30 | "sizes": "144x144", 31 | "type": "image\/png", 32 | "density": "3.0" 33 | }, 34 | { 35 | "src": "\/android-icon-192x192.png", 36 | "sizes": "192x192", 37 | "type": "image\/png", 38 | "density": "4.0" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /src/features/RegexpNormalizer.js: -------------------------------------------------------------------------------- 1 | /** 2 | * normalizes a sentence based on a list of regular expressions. 3 | * @param normalizations - an array of objects {source: /regexp/g, target: "target"} 4 | * @param sample - a string. 5 | * @return a new string, with all normalizations carried out. 6 | */ 7 | export default function (normalizations) { 8 | return function (sample) { 9 | normalizations.forEach(function (normalization) { 10 | var matches = null; 11 | if (normalization.source instanceof RegExp) { 12 | if (!normalization.source.global) { 13 | console.warn("normalization source, " + normalization.source + ", is not global - skipping"); 14 | return; 15 | } 16 | } else { 17 | normalization.source = new RegExp(normalization.source, "gi"); 18 | } 19 | sample = sample.replace(normalization.source, normalization.target); 20 | //console.log(sample); 21 | }); 22 | return sample; 23 | }; 24 | }; 25 | -------------------------------------------------------------------------------- /src/features/NGramsOfLetters.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Extracts substrings of letters of a given size. 3 | */ 4 | 5 | var PAD_CHAR = '#'; 6 | 7 | /** 8 | * Add letter n-gram features to the given feature-vector. 9 | * 10 | * @param numOfLetters - a positive integer. 11 | * @param caseSensitive - boolean. if false, convert all to lower case. 12 | * @param sample - a string. 13 | * @param features an initial hash of features (optional). 14 | * @return a hash with all the different letter n-grams contained in the given sentence. 15 | */ 16 | export default function (numOfLetters, caseSensitive) { 17 | return function (sample, features) { 18 | if (!caseSensitive) sample = sample.toLowerCase(); 19 | for (var i = 0; i < numOfLetters - 1; ++i) 20 | sample = PAD_CHAR + sample + PAD_CHAR; 21 | for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) { 22 | var feature = sample.substr(firstLetter, numOfLetters); 23 | features[feature] = 1; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /test/test_utils.js: -------------------------------------------------------------------------------- 1 | /* 2 | Module test_utils contains helpful routines for running test of existing classifiers, 3 | currently both of them are the copy from different modules 4 | 5 | */ 6 | import PrecisionRecall from '../dist/utils/PrecisionRecall'; 7 | 8 | export function test(dataset, classifier) { 9 | var currentStats = new PrecisionRecall(); 10 | for (var i = 0; i < dataset.length; ++i) { 11 | var expectedClasses = dataset[i].output; 12 | var actualClasses = classifier.classify(dataset[i].input); 13 | currentStats.addCasesHash(expectedClasses, actualClasses, true); 14 | } 15 | return currentStats; 16 | } 17 | export function F1_evaluation(stats, type_of_averaging) { 18 | if (type_of_averaging == 0) { 19 | if ((stats['TP'] == 0) || (stats['TP'] + stats['FP'] == 0) || (stats['TP'] + stats['FN'] == 0)) 20 | return 0; 21 | var precision = stats['TP'] / (stats['TP'] + stats['FP']); 22 | var recall = stats['TP'] / (stats['TP'] + stats['FN']); 23 | var f1 = (precision * recall) / (precision + recall); 24 | return f1; 25 | } 26 | } -------------------------------------------------------------------------------- /test/generaterandom.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Generating random string with given number of words and generating random list 3 | * with given length with element from the given list 4 | */ 5 | 6 | export function random_string(length) { 7 | var chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz'; 8 | length = length ? length : 10; 9 | var string = ''; 10 | for (var i = 0; i < length; i++) { 11 | var word_length = Math.floor(Math.random() * 10 + 1); 12 | for (var j = 0; j <= word_length; j++) { 13 | var randomNumber = Math.floor(Math.random() * chars.length); 14 | var ch = chars.substring(randomNumber, randomNumber + 1); 15 | string += ch; 16 | } 17 | string += " "; 18 | } 19 | return string; 20 | } 21 | export function random_list_length(list) { 22 | return this.random_list(Math.floor(Math.random() * 5), list); 23 | } 24 | export function random_list(length, list) { 25 | var result = []; 26 | for (var i = 0; i < length; i++) { 27 | result.push(list[Math.floor(Math.random() * list.length)]); 28 | } 29 | return result; 30 | } -------------------------------------------------------------------------------- /src/core/svm/SvmPerfDemo.js: -------------------------------------------------------------------------------- 1 | // simple demonstration of binary SVM, based on SVM-Perf 2 | 3 | var SvmPerf = require('./SvmPerf'); 4 | 5 | var trainSet = [ 6 | {input: [0,0], output: 0}, 7 | {input: [1,1], output: 0}, 8 | {input: [0,1], output: 1}, 9 | {input: [1,2], output: 1} ]; 10 | 11 | // the separating line goes through [0,0.5] and [1,1.5]. It is: 12 | // 0.5+x-y = 0 13 | // or: 2y-2x-1 = 0 14 | 15 | 16 | var classifier = new SvmPerf( 17 | { 18 | learn_args: "-c 20.0", 19 | model_file_prefix: "tempfiles/SvmPerfDemo", 20 | debug:false 21 | } 22 | ); 23 | classifier.trainBatch(trainSet); 24 | 25 | // binary output: 26 | console.dir(classifier.classify([0,2])); // 1 27 | console.dir(classifier.classify([1,0])); // 0 28 | 29 | console.dir(classifier.modelMap); // { '0': -1, '1': -2, '2': 2 } 30 | 31 | // explain: 32 | console.dir(classifier.classify([0,2], 3)); // 1 33 | console.dir(classifier.classify([1,0], 3)); // 0 34 | 35 | // continuous output: 36 | console.dir(classifier.classify([0,2], 0, true)); // 3 37 | console.dir(classifier.classify([1,0], 0, true)); // -3 38 | -------------------------------------------------------------------------------- /dist/features/RegexpNormalizer.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * normalizes a sentence based on a list of regular expressions. 10 | * @param normalizations - an array of objects {source: /regexp/g, target: "target"} 11 | * @param sample - a string. 12 | * @return a new string, with all normalizations carried out. 13 | */ 14 | function _default(normalizations) { 15 | return function (sample) { 16 | normalizations.forEach(function (normalization) { 17 | var matches = null; 18 | 19 | if (normalization.source instanceof RegExp) { 20 | if (!normalization.source.global) { 21 | console.warn("normalization source, " + normalization.source + ", is not global - skipping"); 22 | return; 23 | } 24 | } else { 25 | normalization.source = new RegExp(normalization.source, "gi"); 26 | } 27 | 28 | sample = sample.replace(normalization.source, normalization.target); //console.log(sample); 29 | }); 30 | return sample; 31 | }; 32 | } 33 | 34 | ; -------------------------------------------------------------------------------- /src/features/RegexpSplitter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * splits sentences using a custom regular expression. 3 | * @param regexpString - a string 4 | * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence. 5 | * @param text - a string. 6 | * @return an array of parts (sentences). 7 | */ 8 | export default function (regexpString, delimitersToInclude) { 9 | regexpString = "(" + regexpString + ")"; // to capture the delimiters 10 | var regexp = new RegExp(regexpString, "i"); 11 | if (!delimitersToInclude) delimitersToInclude = {}; 12 | return function (text) { 13 | var parts = text.split(regexp); 14 | var normalizedParts = []; 15 | for (var i = 0; i < parts.length; i += 2) { 16 | parts[i] = parts[i].trim(); 17 | var part = parts[i]; 18 | if (i + 1 < parts.length) { 19 | var delimiter = parts[i + 1]; 20 | if (delimitersToInclude[delimiter]) 21 | part += " " + delimiter; 22 | } 23 | if (part.length > 0) 24 | normalizedParts.push(part); 25 | } 26 | //console.log(text); 27 | //console.dir(normalizedParts); 28 | return normalizedParts; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /dist/core/svm/SvmPerfDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // simple demonstration of binary SVM, based on SVM-Perf 4 | var SvmPerf = require('./SvmPerf'); 5 | 6 | var trainSet = [{ 7 | input: [0, 0], 8 | output: 0 9 | }, { 10 | input: [1, 1], 11 | output: 0 12 | }, { 13 | input: [0, 1], 14 | output: 1 15 | }, { 16 | input: [1, 2], 17 | output: 1 18 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is: 19 | // 0.5+x-y = 0 20 | // or: 2y-2x-1 = 0 21 | 22 | var classifier = new SvmPerf({ 23 | learn_args: "-c 20.0", 24 | model_file_prefix: "tempfiles/SvmPerfDemo", 25 | debug: false 26 | }); 27 | classifier.trainBatch(trainSet); // binary output: 28 | 29 | console.dir(classifier.classify([0, 2])); // 1 30 | 31 | console.dir(classifier.classify([1, 0])); // 0 32 | 33 | console.dir(classifier.modelMap); // { '0': -1, '1': -2, '2': 2 } 34 | // explain: 35 | 36 | console.dir(classifier.classify([0, 2], 3)); // 1 37 | 38 | console.dir(classifier.classify([1, 0], 3)); // 0 39 | // continuous output: 40 | 41 | console.dir(classifier.classify([0, 2], 0, true)); // 3 42 | 43 | console.dir(classifier.classify([1, 0], 0, true)); // -3 -------------------------------------------------------------------------------- /dist/features/NGramsOfLetters.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * Extracts substrings of letters of a given size. 10 | */ 11 | var PAD_CHAR = '#'; 12 | /** 13 | * Add letter n-gram features to the given feature-vector. 14 | * 15 | * @param numOfLetters - a positive integer. 16 | * @param caseSensitive - boolean. if false, convert all to lower case. 17 | * @param sample - a string. 18 | * @param features an initial hash of features (optional). 19 | * @return a hash with all the different letter n-grams contained in the given sentence. 20 | */ 21 | 22 | function _default(numOfLetters, caseSensitive) { 23 | return function (sample, features) { 24 | if (!caseSensitive) sample = sample.toLowerCase(); 25 | 26 | for (var i = 0; i < numOfLetters - 1; ++i) { 27 | sample = PAD_CHAR + sample + PAD_CHAR; 28 | } 29 | 30 | for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) { 31 | var feature = sample.substr(firstLetter, numOfLetters); 32 | features[feature] = 1; 33 | } 34 | }; 35 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # next.js build output 61 | .next 62 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # next.js build output 61 | .next 62 | -------------------------------------------------------------------------------- /test/utilsTest/ListTest.js: -------------------------------------------------------------------------------- 1 | import { isEqual } from "underscore"; 2 | import { average, listembed, median, variance } from "../../dist/utils/list"; 3 | 4 | describe("List test function", function() { 5 | it("It should correctly calculate Variance", function() { 6 | let list = [170, 300, 430, 470, 600]; 7 | variance(list).should.be.equal(21704); 8 | }); 9 | 10 | it("it should calculate average correctly", function() { 11 | let list1 = [1, 2, 3, 4, 5, 6, 7]; 12 | average(list1).should.be.equal(4); 13 | }); 14 | 15 | it("it should calculate median correctly", function() { 16 | var list1 = [3, 8, 9, 1, 5, 7, 9, 21]; 17 | median(list1).should.be.equal(7.5); 18 | }); 19 | 20 | it("it should know how to do embedding", function() { 21 | isEqual(listembed(["label"]), [["label"]]).should.equal(true); 22 | isEqual(listembed([]), [[]]).should.equal(true); 23 | isEqual(listembed(undefined), [[]]).should.equal(true); 24 | isEqual(listembed(null), [[]]).should.equal(true); 25 | isEqual( 26 | listembed({ 27 | classes: "label" 28 | }), 29 | ["label"] 30 | ).should.equal(true); 31 | isEqual( 32 | listembed({ 33 | classes: ["label"] 34 | }), 35 | [["label"]] 36 | ).should.equal(true); 37 | }); 38 | }); 39 | -------------------------------------------------------------------------------- /dist/features/RegexpSplitter.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * splits sentences using a custom regular expression. 10 | * @param regexpString - a string 11 | * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence. 12 | * @param text - a string. 13 | * @return an array of parts (sentences). 14 | */ 15 | function _default(regexpString, delimitersToInclude) { 16 | regexpString = "(" + regexpString + ")"; // to capture the delimiters 17 | 18 | var regexp = new RegExp(regexpString, "i"); 19 | if (!delimitersToInclude) delimitersToInclude = {}; 20 | return function (text) { 21 | var parts = text.split(regexp); 22 | var normalizedParts = []; 23 | 24 | for (var i = 0; i < parts.length; i += 2) { 25 | parts[i] = parts[i].trim(); 26 | var part = parts[i]; 27 | 28 | if (i + 1 < parts.length) { 29 | var delimiter = parts[i + 1]; 30 | if (delimitersToInclude[delimiter]) part += " " + delimiter; 31 | } 32 | 33 | if (part.length > 0) normalizedParts.push(part); 34 | } //console.log(text); 35 | //console.dir(normalizedParts); 36 | 37 | 38 | return normalizedParts; 39 | }; 40 | } -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neuro.js.org", 3 | "description": "Documentation site for Neuro", 4 | "version": "1.0.0", 5 | "author": "Turtuvshin Byambaa ", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/intelligo-systems/neuro.git" 9 | }, 10 | "engines": { 11 | "node": ">=0.12" 12 | }, 13 | "devDependencies": { 14 | "@vuepress/plugin-google-analytics": "1.3.0", 15 | "vuepress": "1.3.0", 16 | "vuepress-plugin-janitor": "1.0.0", 17 | "vuepress-plugin-reading-time": "0.1.1", 18 | "vuepress-plugin-rss": "2.0.0", 19 | "yaml-front-matter": "4.1.0" 20 | }, 21 | "scripts": { 22 | "dev": "vuepress dev", 23 | "build": "vuepress build", 24 | "test": "mocha" 25 | }, 26 | "contributors": [ 27 | { 28 | "name": "Turtuvshin Byambaa", 29 | "email": "toroo.byamba@gmail.com" 30 | } 31 | ], 32 | "keywords": [ 33 | "ai", 34 | "ai-bot", 35 | "artificial-intelligence", 36 | "bot", 37 | "chat", 38 | "chatbot", 39 | "classifier", 40 | "conversation", 41 | "framework", 42 | "intelligo", 43 | "intelligence", 44 | "neural network", 45 | "neural", 46 | "neuro", 47 | "network", 48 | "neural-networks", 49 | "machine-learning" 50 | ], 51 | "license": "MIT" 52 | } 53 | -------------------------------------------------------------------------------- /test/featuresTest/FeatureLookupTableTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for feature lookup tables 3 | */ 4 | 5 | import FeatureLookupTable from "../../dist/features/FeatureLookupTable"; 6 | 7 | var sample1 = { 8 | a: 111, 9 | b: 222, 10 | c: 333 11 | }; 12 | var sample2 = { 13 | a: 1111, 14 | d: 4444, 15 | e: 5555 16 | }; 17 | var sample3 = { 18 | c: 33333, 19 | e: 55555, 20 | g: 77777 21 | }; 22 | 23 | describe("feature lookup table", function() { 24 | it("adds samples incrementally", function() { 25 | var table = new FeatureLookupTable(); 26 | var array1 = table.hashToArray(sample1); 27 | var array2 = table.hashToArray(sample2); 28 | var array3 = table.hashToArray(sample3); 29 | array1.should.be.an.instanceOf(Array); 30 | array2.should.be.an.instanceOf(Array); 31 | array3.should.be.an.instanceOf(Array); 32 | table.arrayToHash(array1).should.eql(sample1); 33 | table.arrayToHash(array2).should.eql(sample2); 34 | table.arrayToHash(array3).should.eql(sample3); 35 | }); 36 | 37 | it("adds all samples together", function() { 38 | var table = new FeatureLookupTable(); 39 | var arrays = table.hashesToArrays([sample1, sample2, sample3]); 40 | arrays.should.be.an.instanceOf(Array).and.have.lengthOf(3); 41 | table.arraysToHashes(arrays).should.eql([sample1, sample2, sample3]); 42 | }); 43 | }); 44 | -------------------------------------------------------------------------------- /dist/features/HypernymExtractor.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = _default; 7 | 8 | /** 9 | * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text. 10 | * 11 | * A hypernym is described by a regular expression, a feature string, and a confidence score. 12 | * For example: if regexp=/no (.*)/ and feature="without $1", then, 13 | * if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1). 14 | * 15 | * Adds hypernym features to the given feature-vector. 16 | * 17 | * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence} 18 | * @param sample - a string. 19 | * @param features an initial hash of features (optional). The hypernym features will be added to that array. 20 | */ 21 | function _default(hypernyms) { 22 | return function (sample, features) { 23 | hypernyms.forEach(function (hypernym) { 24 | if (!(hypernym.regexp instanceof RegExp)) { 25 | hypernym.regexp = new RegExp(hypernym.regexp, "gi"); 26 | } 27 | 28 | if (hypernym.regexp.test(sample)) { 29 | features[hypernym.feature] = hypernym.confidence; 30 | } 31 | }); 32 | }; 33 | } 34 | 35 | ; -------------------------------------------------------------------------------- /src/features/HypernymExtractor.js: -------------------------------------------------------------------------------- 1 | /** 2 | * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text. 3 | * 4 | * A hypernym is described by a regular expression, a feature string, and a confidence score. 5 | * For example: if regexp=/no (.*)/ and feature="without $1", then, 6 | * if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1). 7 | * 8 | * Adds hypernym features to the given feature-vector. 9 | * 10 | * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence} 11 | * @param sample - a string. 12 | * @param features an initial hash of features (optional). The hypernym features will be added to that array. 13 | */ 14 | export default function (hypernyms) { 15 | return function (sample, features) { 16 | hypernyms.forEach(function (hypernym) { 17 | if (!(hypernym.regexp instanceof RegExp)) { 18 | hypernym.regexp = new RegExp(hypernym.regexp, "gi"); 19 | } 20 | if (hypernym.regexp.test(sample)) { 21 | features[hypernym.feature] = hypernym.confidence; 22 | } 23 | }); 24 | }; 25 | }; 26 | -------------------------------------------------------------------------------- /src/core/svm/SvmLinearDemo.js: -------------------------------------------------------------------------------- 1 | // simple demonstration of binary SVM, based on LibLinear 2 | 3 | var SvmLinear = require('./SvmLinear'); 4 | var partitions = require(__dirname+'/../../utils/partitions'); 5 | 6 | var dataset = [ 7 | {input: [0,0], output: 0}, 8 | {input: [1,1], output: 0}, 9 | {input: [0,1], output: 1}, 10 | {input: [1,2], output: 1} ]; 11 | 12 | // the separating line goes through [0,0.5] and [1,1.5]. It is: 13 | // 0.5+x-y = 0 14 | // or: -1-2x+2y = 0 15 | 16 | var classifier = new SvmLinear( 17 | { 18 | learn_args: "-c 20", 19 | model_file_prefix: "tempfiles/SvmLinearDemo", 20 | train_command: "liblinear_train", 21 | test_command: "liblinear_test", 22 | multiclass: false 23 | } 24 | ); 25 | classifier.trainBatch(dataset); 26 | 27 | console.log("simple classification: "); 28 | console.dir(classifier.classify([0,2])); // 1 29 | console.dir(classifier.classify([1,0])); // 0 30 | 31 | console.log("model: "); 32 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 } 33 | 34 | partitions.partitions(dataset.concat(dataset), 2, function(train, test, index) { 35 | 36 | console.log("fold: "+index) 37 | classifier.trainBatch(train) 38 | 39 | test.forEach(function(instance) { 40 | console.dir("Classify instance:") 41 | console.dir(instance) 42 | console.dir(classifier.classify(instance.input)); 43 | }); 44 | }); -------------------------------------------------------------------------------- /test/featuresTest/RegexpNormalizerTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Regular Expression Normalizer. 3 | */ 4 | 5 | import { RegexpNormalizer } from "../../dist/features"; 6 | 7 | describe("RegexpNormalizer", function() { 8 | it("normalizes simple strings", function() { 9 | var ren = RegexpNormalizer([ 10 | { 11 | source: "can't", 12 | target: "cannot" 13 | }, 14 | { 15 | source: "cannot", 16 | target: "can not" 17 | }, 18 | { 19 | source: "won't", 20 | target: "will not" 21 | } 22 | ]); 23 | ren("I can't do it and I won't do it").should.eql( 24 | "I can not do it and I will not do it" 25 | ); 26 | }); 27 | it("normalizes regular expressions", function() { 28 | var ren = RegexpNormalizer([ 29 | { 30 | source: "\\b(...+)est\\b", 31 | target: "$1" 32 | }, 33 | { 34 | source: "\\b(...+)er\\b", 35 | target: "$1" 36 | } 37 | ]); 38 | ren("faster and highest").should.eql("fast and high"); 39 | }); 40 | it("normalizes numbers", function() { 41 | var ren = RegexpNormalizer([ 42 | { 43 | source: "\\b(\\d+)k\\b", 44 | target: "$1000" 45 | } 46 | ]); 47 | ren("I want 7k dollars").should.eql("I want 7000 dollars"); 48 | ren("I want 70k dollars").should.eql("I want 70000 dollars"); 49 | }); 50 | }); 51 | -------------------------------------------------------------------------------- /dist/core/multilabel/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.ThresholdClassifier = exports.CrossLanguageModel = exports.MetaLabeler = exports.MulticlassSegmentation = exports.BinarySegmentation = exports.BinaryRelevance = void 0; 7 | 8 | var BinaryRelevance = require('./BinaryRelevance'); 9 | 10 | exports.BinaryRelevance = BinaryRelevance; 11 | 12 | var BinarySegmentation = require('./BinarySegmentation'); 13 | 14 | exports.BinarySegmentation = BinarySegmentation; 15 | 16 | var MulticlassSegmentation = require('./MulticlassSegmentation'); 17 | 18 | exports.MulticlassSegmentation = MulticlassSegmentation; 19 | 20 | var MetaLabeler = require('./MetaLabeler'); 21 | 22 | exports.MetaLabeler = MetaLabeler; 23 | 24 | var CrossLanguageModel = require('./CrossLangaugeModelClassifier'); 25 | 26 | exports.CrossLanguageModel = CrossLanguageModel; 27 | 28 | var ThresholdClassifier = require('./ThresholdClassifier'); // add a "classify and log" method to all classifiers, for demos: 29 | 30 | 31 | exports.ThresholdClassifier = ThresholdClassifier; 32 | 33 | for (var classifierClass in module.exports) { 34 | if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify) module.exports[classifierClass].prototype.classifyAndLog = function (sample) { 35 | console.log(sample + " is " + this.classify(sample)); 36 | }; 37 | } -------------------------------------------------------------------------------- /test/featuresTest/RegexpSplitterTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Regular Expression Splitter. 3 | */ 4 | 5 | import { RegexpSplitter } from "../../dist/features"; 6 | 7 | describe("RegexpSplitter", function() { 8 | it("splits sentences without delimiter", function() { 9 | var res = RegexpSplitter("[.,;?!]|and"); 10 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([ 11 | "Hi", 12 | "Who are you", 13 | "I am Intelligo Bot" 14 | ]); 15 | res("Hi.Who are you?I am Intelligo Bot.").should.eql([ 16 | "Hi", 17 | "Who are you", 18 | "I am Intelligo Bot" 19 | ]); 20 | res( 21 | "Hi. Who are you? I am Intelligo Bot. " 22 | ).should.eql(["Hi", "Who are you", "I am Intelligo Bot"]); 23 | }); 24 | it("splits sentences with delimiter", function() { 25 | var res = RegexpSplitter("[.,;?!]|and", { 26 | "?": true, 27 | ".": false 28 | }); 29 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([ 30 | "Hi", 31 | "Who are you ?", 32 | "I am Intelligo Bot" 33 | ]); 34 | res("Hi.Who are you?I am Intelligo Bot.").should.eql([ 35 | "Hi", 36 | "Who are you ?", 37 | "I am Intelligo Bot" 38 | ]); 39 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([ 40 | "Hi", 41 | "Who are you ?", 42 | "I am Intelligo Bot" 43 | ]); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /dist/core/svm/SvmLinearDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // simple demonstration of binary SVM, based on LibLinear 4 | var SvmLinear = require('./SvmLinear'); 5 | 6 | var partitions = require(__dirname + '/../../utils/partitions'); 7 | 8 | var dataset = [{ 9 | input: [0, 0], 10 | output: 0 11 | }, { 12 | input: [1, 1], 13 | output: 0 14 | }, { 15 | input: [0, 1], 16 | output: 1 17 | }, { 18 | input: [1, 2], 19 | output: 1 20 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is: 21 | // 0.5+x-y = 0 22 | // or: -1-2x+2y = 0 23 | 24 | var classifier = new SvmLinear({ 25 | learn_args: "-c 20", 26 | model_file_prefix: "tempfiles/SvmLinearDemo", 27 | train_command: "liblinear_train", 28 | test_command: "liblinear_test", 29 | multiclass: false 30 | }); 31 | classifier.trainBatch(dataset); 32 | console.log("simple classification: "); 33 | console.dir(classifier.classify([0, 2])); // 1 34 | 35 | console.dir(classifier.classify([1, 0])); // 0 36 | 37 | console.log("model: "); 38 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 } 39 | 40 | partitions.partitions(dataset.concat(dataset), 2, function (train, test, index) { 41 | console.log("fold: " + index); 42 | classifier.trainBatch(train); 43 | test.forEach(function (instance) { 44 | console.dir("Classify instance:"); 45 | console.dir(instance); 46 | console.dir(classifier.classify(instance.input)); 47 | }); 48 | }); -------------------------------------------------------------------------------- /dist/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports["default"] = void 0; 7 | 8 | var _EnhancedClassifier = _interopRequireDefault(require("./core/EnhancedClassifier")); 9 | 10 | var _multilabel = _interopRequireDefault(require("./core/multilabel")); 11 | 12 | var _NeuralNetwork = _interopRequireDefault(require("./core/neural/NeuralNetwork")); 13 | 14 | var _SvmJs = _interopRequireDefault(require("./core/svm/SvmJs")); 15 | 16 | var _SvmLinear = _interopRequireDefault(require("./core/svm/SvmLinear")); 17 | 18 | var _SvmPerf = _interopRequireDefault(require("./core/svm/SvmPerf")); 19 | 20 | var _WinnowHash = _interopRequireDefault(require("./core/winnow/WinnowHash")); 21 | 22 | var _features = _interopRequireDefault(require("./features")); 23 | 24 | var _formats = _interopRequireDefault(require("./formats")); 25 | 26 | var _utils = _interopRequireDefault(require("./utils")); 27 | 28 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; } 29 | 30 | var _default = { 31 | classifiers: { 32 | NeuralNetwork: _NeuralNetwork["default"], 33 | SvmJs: _SvmJs["default"], 34 | SvmLinear: _SvmLinear["default"], 35 | SvmPerf: _SvmPerf["default"], 36 | Winnow: _WinnowHash["default"], 37 | multilabel: _multilabel["default"], 38 | EnhancedClassifier: _EnhancedClassifier["default"] 39 | }, 40 | features: _features["default"], 41 | formats: _formats["default"], 42 | utils: _utils["default"] 43 | }; 44 | exports["default"] = _default; -------------------------------------------------------------------------------- /src/formats/svmlight.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Small utility for writing a dataset in SVM-light format. 3 | * 4 | * @author Erel Segal-Halevi 5 | * @since 2013-09 6 | */ 7 | 8 | 9 | /** 10 | * convert a single dataset to compact JSON format. 11 | * @param dataset an array of samples in the format {input: [value1, value2, ...], output: (0|1)} 12 | * @param bias if nonzero, add it at the beginning of the vector. 13 | * @param binarize if true, change output to -1/1. If false, leave output as it is 14 | */ 15 | exports.toSvmLight = function(dataset, bias, binarize, firstFeatureNumber) { 16 | var lines = ""; 17 | for (var i=0; i0? "\n": "") + 19 | (binarize? (dataset[i].output>0? "1": "-1"): dataset[i].output) + // in svm-light, the output comes first: 20 | featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber) 21 | ; 22 | lines += line; 23 | }; 24 | lines += "\n"; 25 | return lines; 26 | } 27 | 28 | 29 | 30 | /** 31 | * convert an array of features to a single line in SVM-light format. The line starts with a space. 32 | */ 33 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) { 34 | if (!Array.isArray(features)) 35 | throw new Error("Expected an array, but got "+JSON.stringify(features)) 36 | var line = (bias? " "+firstFeatureNumber+":"+bias: ""); 37 | for (var feature=0; feature 0 ? "\n" : "") + (binarize ? dataset[i].output > 0 ? "1" : "-1" : dataset[i].output) + // in svm-light, the output comes first: 21 | featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber); 22 | lines += line; 23 | } 24 | 25 | ; 26 | lines += "\n"; 27 | return lines; 28 | }; 29 | /** 30 | * convert an array of features to a single line in SVM-light format. The line starts with a space. 31 | */ 32 | 33 | 34 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) { 35 | if (!Array.isArray(features)) throw new Error("Expected an array, but got " + JSON.stringify(features)); 36 | var line = bias ? " " + firstFeatureNumber + ":" + bias : ""; 37 | 38 | for (var feature = 0; feature < features.length; ++feature) { 39 | var value = features[feature]; 40 | if (value) line += " " + (feature + firstFeatureNumber + (bias ? 1 : 0)) + ":" + value.toPrecision(5); 41 | } 42 | 43 | return line; 44 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neuro.js", 3 | "description": "Neuro.js is machine learning framework for building AI assistants and chat-bots.", 4 | "version": "0.1.7", 5 | "author": "Turtuvshin Byambaa ", 6 | "homepage": "https://neuro.js.org", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/intelligo-systems/neuro.git" 10 | }, 11 | "engines": { 12 | "node": ">=0.12" 13 | }, 14 | "dependencies": { 15 | "async": "3.1.1", 16 | "brain.js": "1.6.1", 17 | "graph-paths": "latest", 18 | "intelligo": "1.3.0", 19 | "languagemodel": "latest", 20 | "lodash": "4.17.15", 21 | "sprintf": "0.1.5", 22 | "svm": "0.1.1", 23 | "temp": "0.9.1", 24 | "underscore": "1.9.1", 25 | "wordsworth": "0.1.0" 26 | }, 27 | "devDependencies": { 28 | "@babel/cli": "7.8.4", 29 | "@babel/core": "7.8.4", 30 | "@babel/preset-env": "7.8.4", 31 | "@babel/register": "7.8.3", 32 | "mocha": "6.2.2", 33 | "should": "13.2.3" 34 | }, 35 | "scripts": { 36 | "start": "node index.js", 37 | "build": "babel src -d dist", 38 | "test": "mocha --require @babel/register" 39 | }, 40 | "contributors": [ 41 | { 42 | "name": "Turtuvshin Byambaa", 43 | "email": "toroo.byamba@gmail.com" 44 | } 45 | ], 46 | "keywords": [ 47 | "ai", 48 | "ai-bot", 49 | "artificial-intelligence", 50 | "bot", 51 | "chat", 52 | "chatbot", 53 | "classifier", 54 | "conversation", 55 | "framework", 56 | "intelligo", 57 | "intelligence", 58 | "neural network", 59 | "neural", 60 | "network", 61 | "neural-networks", 62 | "machine-learning" 63 | ], 64 | "license": "MIT" 65 | } 66 | -------------------------------------------------------------------------------- /src/utils/list.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Utilities for lists 3 | * 4 | * @author Vasily Konovalov 5 | */ 6 | import { 7 | reduce, 8 | isObject, 9 | isArray, 10 | each, 11 | clone 12 | } from 'underscore'; 13 | 14 | // Calculating the median of an array basically involves sorting the array and picking the middle number. 15 | // If it’s an even amount of numbers you take the two numbers in the middle and average them. 16 | export function median(values) { 17 | values.sort(function (a, b) { 18 | return a - b; 19 | }); 20 | var half = Math.floor(values.length / 2); 21 | if (values.length % 2) 22 | return values[half]; 23 | else 24 | return (values[half - 1] + values[half]) / 2.0; 25 | } 26 | 27 | export function variance(list) { 28 | sum = reduce(list, function (memo, num) { 29 | return memo + num; 30 | }, 0); 31 | exp = sum / list.length 32 | sum2 = reduce(list, function (memo, num) { 33 | return memo + num * num; 34 | }, 0); 35 | exp2 = sum2 / list.length 36 | return exp2 - exp * exp 37 | } 38 | 39 | export function average(list) { 40 | let sum = reduce(list, function (memo, num) { 41 | return memo + num; 42 | }, 0); 43 | return sum / list.length 44 | } 45 | 46 | // @input - list 47 | // @output - embedded list 48 | export function listembed(label) { 49 | if ((label === null) || (label == undefined) || (typeof label == 'undefined')) 50 | return [ 51 | [] 52 | ] 53 | // if (typeof label != 'undefined') 54 | // else 55 | // { 56 | if ((isObject(label)) && !(isArray(label))) 57 | // if ('classes' in JSON.parse(label)) 58 | if ('classes' in label) 59 | label = label.classes 60 | 61 | if (!(label[0] instanceof Array)) 62 | return [label] 63 | else 64 | return label 65 | // } 66 | // else 67 | // { 68 | // return [label] 69 | // } 70 | } 71 | 72 | export function clonedataset(set) { 73 | set1 = [] 74 | each(set, function (value, key, list) { 75 | set1.push(clone(value)) 76 | }) 77 | return set1 78 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # neuro.js 2 | 3 | [![npm](https://img.shields.io/npm/v/neuro.js.svg?style=plastic)](https://www.npmjs.com/package/neuro.js) 4 | [![npm](https://img.shields.io/npm/dt/neuro.js.svg?style=plastic)](https://www.npmjs.com/package/neuro.js) 5 | [![GitHub license](https://img.shields.io/github/license/intelligo-systems/neuro.js.svg)](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE) 6 | [![Twitter](https://img.shields.io/twitter/url/https/github.com/intelligo-systems/neuro.js.svg?style=social)](https://twitter.com/intent/tweet?text=Wow:&url=https%3A%2F%2Fgithub.com%2Fintelligo-systems%2Fintelligo) 7 | 8 | Neuro.js is machine learning framework for building AI assistants and chat-bots. 9 | 10 | [![NPM](https://nodei.co/npm/neuro.js.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/neuro.js/) 11 | 12 | | [Installation][] | [Documentation][] | [Contributors][] | [License][] | 13 | |---|---|---|---| 14 | 15 | # Installation 16 | 17 | ``` 18 | npm install neuro.js --save 19 | ``` 20 | 21 | ## Documentation 22 | 23 | To check out docs, visit [neuro.js.org](https://neuro.js.org). 24 | 25 | 26 | ## Contributors 27 | 28 | You may contribute in several ways like creating new features, fixing bugs, improving documentation and examples 29 | or translating any document here to your language. [Find more information in CODE_OF_CONDUCT.md](.github/CODE_OF_CONDUCT.md). 30 | Contributors 31 | 32 | ## License 33 | 34 | > Copyright (C) 2019 Intelligo LLC. 35 | > neuro.js is open-sourced software licensed under the [MIT](https://opensource.org/licenses/MIT) license. 36 | > (See the [LICENSE](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE) file for the whole license text.) 37 | 38 | **[⬆ back to top](#neuro.js)** 39 | 40 | [Installation]:#installation 41 | [Documentation]:#documentation 42 | [Contributors]:#contributors 43 | [License]:#license 44 | 45 | -------------------------------------------------------------------------------- /src/core/svm/SvmLinearMulticlassDemo.js: -------------------------------------------------------------------------------- 1 | // simple demonstration of multiclass SVM, based on LibLinear 2 | 3 | var SvmLinear = require('./SvmLinear'); 4 | 5 | var trainSet = [ 6 | {input: [0,0], output: 3}, 7 | {input: [1,1], output: 3}, 8 | 9 | {input: [0,1], output: 4}, 10 | {input: [1,2], output: 4}, 11 | 12 | {input: [0,2], output: 5}, 13 | {input: [1,3], output: 5}, 14 | ]; 15 | 16 | // One separating line goes through [0,0.5] and [1,1.5]. It is: 17 | // 0.5+x-y = 0 18 | // or: -1-2x+2y = 0 19 | 20 | //Another separating line goes through [0,1.5] and [1,2.5]. It is: 21 | // 1.5+x-y = 0 22 | //or: -3-2x+2y = 0 23 | 24 | 25 | var classifier = new SvmLinear( 26 | { 27 | learn_args: "-c 20", 28 | model_file_prefix: "tempfiles/SvmLinearMulticlassDemo", 29 | multiclass: true, 30 | debug: false 31 | } 32 | ); 33 | classifier.trainBatch(trainSet); 34 | 35 | console.log("simple classification: "); 36 | console.dir(classifier.classify([1,0])); // 3 37 | console.dir(classifier.classify([0,1.3])); // 4 38 | console.dir(classifier.classify([0,1.7])); // 5 39 | console.dir(classifier.classify([0,3])); // 5 40 | 41 | console.log("model: "); 42 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 } 43 | 44 | console.log("explained classification: "); 45 | console.dir(classifier.classify([1,0],3)); // 3 46 | console.dir(classifier.classify([0,1.3],3)); // 4 47 | console.dir(classifier.classify([0,1.7],3)); // 5 48 | console.dir(classifier.classify([0,3],3)); // 5 49 | 50 | console.log("classification with scores: "); 51 | console.dir(classifier.classify([1,0],0,true)); // 3 52 | console.dir(classifier.classify([0,1.3],0,true)); // 4 53 | console.dir(classifier.classify([0,1.7],0,true)); // 5 54 | console.dir(classifier.classify([0,3],0,true)); // 5 55 | 56 | console.log("explained classification with scores: "); 57 | console.dir(classifier.classify([1,0],3,true)); // 3 58 | console.dir(classifier.classify([0,1.3],3,true)); // 4 59 | console.dir(classifier.classify([0,1.7],3,true)); // 5 60 | console.dir(classifier.classify([0,3],3,true)); // 5 61 | -------------------------------------------------------------------------------- /src/core/multilabel/multilabelutils.js: -------------------------------------------------------------------------------- 1 | var _ = require("underscore")._; 2 | /** 3 | * A utility function, used by several multi-label classifiers. 4 | * 5 | * @param scoresVector [[label1,score1],[label2,score2],...] 6 | * @param explain (int) if >0, return explanation. 7 | * @param withScores (boolean) if true, return the original scores vector. 8 | * @param threshold if withScores is false, all labels with scores above this threshold will be returned. 9 | */ 10 | module.exports = { 11 | // iterate the list and collect the second item from the every element of the list 12 | getvalue: function (list) { 13 | val = [] 14 | for (elem in list) 15 | { val.push(list[elem][1]) } 16 | return val 17 | }, 18 | 19 | normalizeClasses: function (expectedClasses) { 20 | if (!_(expectedClasses).isArray()) 21 | expectedClasses = [expectedClasses]; 22 | 23 | expectedClasses = expectedClasses.map(this.stringifyClass); 24 | expectedClasses.sort(); 25 | return expectedClasses; 26 | }, 27 | 28 | stringifyClass: function (aClass) { 29 | return (_(aClass).isString()? aClass: JSON.stringify(aClass)); 30 | }, 31 | 32 | stringifyIfNeeded: function (label) { 33 | return (typeof(label)==='string'? label: JSON.stringify(label)); 34 | }, 35 | 36 | normalizeOutputLabels: function(labels) { 37 | if (!Array.isArray(labels)) 38 | labels = [labels]; 39 | return labels.map(module.exports.stringifyIfNeeded); 40 | }, 41 | 42 | mapScoresVectorToMultilabelResult: function(scoresVector, explain, withScores, threshold) { 43 | var results; 44 | if (withScores) { 45 | results = scoresVector; 46 | } else { 47 | results = []; 48 | scoresVector.forEach(function(pair) { 49 | if (pair[1]>=threshold) 50 | results.push(pair[0]); 51 | }); 52 | } 53 | return explain>0? { 54 | classes: results, 55 | explanation: scoresVector.map(function(pair) {return pair[0]+": "+pair[1];}) 56 | }: 57 | results; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /dist/core/multilabel/multilabelutils.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var _ = require("underscore")._; 4 | /** 5 | * A utility function, used by several multi-label classifiers. 6 | * 7 | * @param scoresVector [[label1,score1],[label2,score2],...] 8 | * @param explain (int) if >0, return explanation. 9 | * @param withScores (boolean) if true, return the original scores vector. 10 | * @param threshold if withScores is false, all labels with scores above this threshold will be returned. 11 | */ 12 | 13 | 14 | module.exports = { 15 | // iterate the list and collect the second item from the every element of the list 16 | getvalue: function getvalue(list) { 17 | val = []; 18 | 19 | for (elem in list) { 20 | val.push(list[elem][1]); 21 | } 22 | 23 | return val; 24 | }, 25 | normalizeClasses: function normalizeClasses(expectedClasses) { 26 | if (!_(expectedClasses).isArray()) expectedClasses = [expectedClasses]; 27 | expectedClasses = expectedClasses.map(this.stringifyClass); 28 | expectedClasses.sort(); 29 | return expectedClasses; 30 | }, 31 | stringifyClass: function stringifyClass(aClass) { 32 | return _(aClass).isString() ? aClass : JSON.stringify(aClass); 33 | }, 34 | stringifyIfNeeded: function stringifyIfNeeded(label) { 35 | return typeof label === 'string' ? label : JSON.stringify(label); 36 | }, 37 | normalizeOutputLabels: function normalizeOutputLabels(labels) { 38 | if (!Array.isArray(labels)) labels = [labels]; 39 | return labels.map(module.exports.stringifyIfNeeded); 40 | }, 41 | mapScoresVectorToMultilabelResult: function mapScoresVectorToMultilabelResult(scoresVector, explain, withScores, threshold) { 42 | var results; 43 | 44 | if (withScores) { 45 | results = scoresVector; 46 | } else { 47 | results = []; 48 | scoresVector.forEach(function (pair) { 49 | if (pair[1] >= threshold) results.push(pair[0]); 50 | }); 51 | } 52 | 53 | return explain > 0 ? { 54 | classes: results, 55 | explanation: scoresVector.map(function (pair) { 56 | return pair[0] + ": " + pair[1]; 57 | }) 58 | } : results; 59 | } 60 | }; -------------------------------------------------------------------------------- /dist/utils/list.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.median = median; 7 | exports.variance = variance; 8 | exports.average = average; 9 | exports.listembed = listembed; 10 | exports.clonedataset = clonedataset; 11 | 12 | var _underscore = require("underscore"); 13 | 14 | /** 15 | * Utilities for lists 16 | * 17 | * @author Vasily Konovalov 18 | */ 19 | // Calculating the median of an array basically involves sorting the array and picking the middle number. 20 | // If it’s an even amount of numbers you take the two numbers in the middle and average them. 21 | function median(values) { 22 | values.sort(function (a, b) { 23 | return a - b; 24 | }); 25 | var half = Math.floor(values.length / 2); 26 | if (values.length % 2) return values[half];else return (values[half - 1] + values[half]) / 2.0; 27 | } 28 | 29 | function variance(list) { 30 | sum = (0, _underscore.reduce)(list, function (memo, num) { 31 | return memo + num; 32 | }, 0); 33 | exp = sum / list.length; 34 | sum2 = (0, _underscore.reduce)(list, function (memo, num) { 35 | return memo + num * num; 36 | }, 0); 37 | exp2 = sum2 / list.length; 38 | return exp2 - exp * exp; 39 | } 40 | 41 | function average(list) { 42 | var sum = (0, _underscore.reduce)(list, function (memo, num) { 43 | return memo + num; 44 | }, 0); 45 | return sum / list.length; 46 | } // @input - list 47 | // @output - embedded list 48 | 49 | 50 | function listembed(label) { 51 | if (label === null || label == undefined || typeof label == 'undefined') return [[]]; // if (typeof label != 'undefined') 52 | // else 53 | // { 54 | 55 | if ((0, _underscore.isObject)(label) && !(0, _underscore.isArray)(label)) // if ('classes' in JSON.parse(label)) 56 | if ('classes' in label) label = label.classes; 57 | if (!(label[0] instanceof Array)) return [label];else return label; // } 58 | // else 59 | // { 60 | // return [label] 61 | // } 62 | } 63 | 64 | function clonedataset(set) { 65 | set1 = []; 66 | (0, _underscore.each)(set, function (value, key, list) { 67 | set1.push((0, _underscore.clone)(value)); 68 | }); 69 | return set1; 70 | } -------------------------------------------------------------------------------- /dist/core/svm/SvmLinearMulticlassDemo.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // simple demonstration of multiclass SVM, based on LibLinear 4 | var SvmLinear = require('./SvmLinear'); 5 | 6 | var trainSet = [{ 7 | input: [0, 0], 8 | output: 3 9 | }, { 10 | input: [1, 1], 11 | output: 3 12 | }, { 13 | input: [0, 1], 14 | output: 4 15 | }, { 16 | input: [1, 2], 17 | output: 4 18 | }, { 19 | input: [0, 2], 20 | output: 5 21 | }, { 22 | input: [1, 3], 23 | output: 5 24 | }]; // One separating line goes through [0,0.5] and [1,1.5]. It is: 25 | // 0.5+x-y = 0 26 | // or: -1-2x+2y = 0 27 | //Another separating line goes through [0,1.5] and [1,2.5]. It is: 28 | // 1.5+x-y = 0 29 | //or: -3-2x+2y = 0 30 | 31 | var classifier = new SvmLinear({ 32 | learn_args: "-c 20", 33 | model_file_prefix: "tempfiles/SvmLinearMulticlassDemo", 34 | multiclass: true, 35 | debug: false 36 | }); 37 | classifier.trainBatch(trainSet); 38 | console.log("simple classification: "); 39 | console.dir(classifier.classify([1, 0])); // 3 40 | 41 | console.dir(classifier.classify([0, 1.3])); // 4 42 | 43 | console.dir(classifier.classify([0, 1.7])); // 5 44 | 45 | console.dir(classifier.classify([0, 3])); // 5 46 | 47 | console.log("model: "); 48 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 } 49 | 50 | console.log("explained classification: "); 51 | console.dir(classifier.classify([1, 0], 3)); // 3 52 | 53 | console.dir(classifier.classify([0, 1.3], 3)); // 4 54 | 55 | console.dir(classifier.classify([0, 1.7], 3)); // 5 56 | 57 | console.dir(classifier.classify([0, 3], 3)); // 5 58 | 59 | console.log("classification with scores: "); 60 | console.dir(classifier.classify([1, 0], 0, true)); // 3 61 | 62 | console.dir(classifier.classify([0, 1.3], 0, true)); // 4 63 | 64 | console.dir(classifier.classify([0, 1.7], 0, true)); // 5 65 | 66 | console.dir(classifier.classify([0, 3], 0, true)); // 5 67 | 68 | console.log("explained classification with scores: "); 69 | console.dir(classifier.classify([1, 0], 3, true)); // 3 70 | 71 | console.dir(classifier.classify([0, 1.3], 3, true)); // 4 72 | 73 | console.dir(classifier.classify([0, 1.7], 3, true)); // 5 74 | 75 | console.dir(classifier.classify([0, 3], 3, true)); // 5 -------------------------------------------------------------------------------- /test/classifiersTest/multilabel/ClassifierWithSplitterTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for multi-label classifier with input-splitter (sentence splitter) 3 | */ 4 | 5 | import { EnhancedClassifier, multilabel, Winnow } from "../../../dist/core"; 6 | import { NGramsOfWords, RegexpSplitter } from "../../../dist/features"; 7 | 8 | describe.skip("baseline - classifier without a splitter", function() { 9 | it("should not classify long sentencs", function() { 10 | var classifier = new EnhancedClassifier({ 11 | classifierType: multilabel.BinaryRelevance.bind(this, { 12 | binaryClassifierType: Winnow.bind(this, { 13 | retrain_count: 10 14 | }) 15 | }), 16 | featureExtractor: NGramsOfWords(1), 17 | inputSplitter: null 18 | }); 19 | 20 | classifier.trainBatch([ 21 | { input: "I want aa", output: "A" }, 22 | { input: "I want bb", output: "B" }, 23 | { input: "I want cc", output: "C" } 24 | ]); 25 | 26 | classifier.classify("I want aa").should.eql(["A"]); 27 | classifier.classify("I want bb").should.eql(["B"]); 28 | classifier.classify("I want cc").should.eql(["C"]); 29 | classifier 30 | .classify("I want aa, I want bb, and I want cc") 31 | .should.not.eql(["A", "B", "C"]); 32 | }); 33 | }); 34 | 35 | describe.skip("classifier with a splitter", function() { 36 | it("should classify long sentencs", function() { 37 | var classifier = new EnhancedClassifier({ 38 | classifierType: multilabel.BinaryRelevance.bind(this, { 39 | binaryClassifierType: Winnow.bind(this, { 40 | retrain_count: 3 41 | }) 42 | }), 43 | featureExtractor: NGramsOfWords(1), 44 | inputSplitter: RegexpSplitter("[.,;?!]|and") 45 | }); 46 | 47 | classifier.trainBatch([ 48 | { input: "I want aa", output: "A" }, // train on single class 49 | { input: "I want bb", output: "B" }, // train on array with single class (same effect) 50 | { input: "I want cc", output: "C" } // train on structured class, that will be stringified to "{C:c}". 51 | ]); 52 | 53 | classifier.classify("I want aa").should.eql(["A"]); 54 | classifier.classify("I want bb").should.eql(["B"]); 55 | classifier.classify("I want cc").should.eql(["C"]); 56 | classifier 57 | .classify("I want aa, I want bb, and I want cc") 58 | .should.eql(["A", "B", "C"]); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /src/utils/unseen_correlation.js: -------------------------------------------------------------------------------- 1 | /* 2 | Correlation between unseen words and False Negative 3 | 4 | The assumption is that previously unseen word mostly might cause false negative type of mistake. 5 | Module does cross-validation on the given dataset, in the test utterances where there is 6 | unseen words and false negative mistake the the dict is build, where the key is a word and 7 | the value is the list of false negative mistakes. 8 | 9 | @author Vasily Konovalov 10 | */ 11 | 12 | var _ = require('underscore')._; 13 | var fs = require('fs'); 14 | var partitions = require('./partitions'); 15 | var trainAndTest = require('./trainAndTest').trainAndTest; 16 | var trainAndTest_hash= require('./trainAndTest').trainAndTest_hash; 17 | 18 | function normalizer(sentence) { 19 | if (typeof sentence == 'undefined') 20 | {return ""} 21 | else 22 | { 23 | return sentence.toLowerCase().trim(); 24 | } 25 | } 26 | 27 | function tokenizedataset(dataset, tokenize) 28 | { 29 | vocabulary = [] 30 | for (var sample in dataset) 31 | { 32 | if (dataset[sample].length!=0) 33 | { 34 | var words = tokenize(normalizer(dataset[sample]['input'])); 35 | vocabulary = vocabulary.concat(words); 36 | } 37 | } 38 | return _.uniq(vocabulary); 39 | } 40 | 41 | module.exports.tokenize = function(str) 42 | { 43 | pattern = new RegExp(/(\w+|\!|\'|\"")/i); 44 | str = str.split(pattern) 45 | return _.without(str,'',' ') 46 | } 47 | /* 48 | @params dataset - dataset to estimate the correlation 49 | @params classifier - classifier to estimate false negative mistakes. 50 | 51 | */ 52 | module.exports.unseen_correlation = function(dataset, classifier, tokenize) { 53 | unseen_correlation = {} 54 | 55 | partitions.partitions(dataset, 5, function(trainSet, testSet, index) { 56 | unseen_vocabulary = tokenizedataset(testSet, tokenize) 57 | seen_vocabulary = tokenizedataset(trainSet, tokenize) 58 | var stats = trainAndTest_hash(classifier, trainSet, testSet, 5); 59 | 60 | _.each(stats['data'], function(report, key, list){ 61 | if (report['explanations']['FN'].length > 0) 62 | { 63 | unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary) 64 | _.each(unseen_words, function(word, key, list) { 65 | if (!(word in unseen_correlation)) 66 | { 67 | unseen_correlation[word] = [] 68 | } 69 | unseen_correlation[word].push(report['explanations']['FN']) 70 | }) 71 | } 72 | }) 73 | }) 74 | return unseen_correlation 75 | } 76 | -------------------------------------------------------------------------------- /dist/utils/unseen_correlation.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /* 4 | Correlation between unseen words and False Negative 5 | 6 | The assumption is that previously unseen word mostly might cause false negative type of mistake. 7 | Module does cross-validation on the given dataset, in the test utterances where there is 8 | unseen words and false negative mistake the the dict is build, where the key is a word and 9 | the value is the list of false negative mistakes. 10 | 11 | @author Vasily Konovalov 12 | */ 13 | var _ = require('underscore')._; 14 | 15 | var fs = require('fs'); 16 | 17 | var partitions = require('./partitions'); 18 | 19 | var trainAndTest = require('./trainAndTest').trainAndTest; 20 | 21 | var trainAndTest_hash = require('./trainAndTest').trainAndTest_hash; 22 | 23 | function normalizer(sentence) { 24 | if (typeof sentence == 'undefined') { 25 | return ""; 26 | } else { 27 | return sentence.toLowerCase().trim(); 28 | } 29 | } 30 | 31 | function tokenizedataset(dataset, tokenize) { 32 | vocabulary = []; 33 | 34 | for (var sample in dataset) { 35 | if (dataset[sample].length != 0) { 36 | var words = tokenize(normalizer(dataset[sample]['input'])); 37 | vocabulary = vocabulary.concat(words); 38 | } 39 | } 40 | 41 | return _.uniq(vocabulary); 42 | } 43 | 44 | module.exports.tokenize = function (str) { 45 | pattern = new RegExp(/(\w+|\!|\'|\"")/i); 46 | str = str.split(pattern); 47 | return _.without(str, '', ' '); 48 | }; 49 | /* 50 | @params dataset - dataset to estimate the correlation 51 | @params classifier - classifier to estimate false negative mistakes. 52 | 53 | */ 54 | 55 | 56 | module.exports.unseen_correlation = function (dataset, classifier, tokenize) { 57 | unseen_correlation = {}; 58 | partitions.partitions(dataset, 5, function (trainSet, testSet, index) { 59 | unseen_vocabulary = tokenizedataset(testSet, tokenize); 60 | seen_vocabulary = tokenizedataset(trainSet, tokenize); 61 | var stats = trainAndTest_hash(classifier, trainSet, testSet, 5); 62 | 63 | _.each(stats['data'], function (report, key, list) { 64 | if (report['explanations']['FN'].length > 0) { 65 | unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary); 66 | 67 | _.each(unseen_words, function (word, key, list) { 68 | if (!(word in unseen_correlation)) { 69 | unseen_correlation[word] = []; 70 | } 71 | 72 | unseen_correlation[word].push(report['explanations']['FN']); 73 | }); 74 | } 75 | }); 76 | }); 77 | return unseen_correlation; 78 | }; -------------------------------------------------------------------------------- /test/utilsTest/PartitionsTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Partitions unit (creating partitions for train and test) 3 | */ 4 | 5 | import _, { isEqual } from "underscore"; 6 | import { partitions } from "../../dist/utils"; 7 | 8 | describe("partitions", function() { 9 | it("partitions_consistent_by_fold", function() { 10 | var dataset = [1, 3, 5, 7, 9, 11, 13]; 11 | var data = partitions.partitions_consistent_by_fold(dataset, 2, 1); 12 | isEqual(data, { 13 | train: [1, 3, 5, 13], 14 | test: [7, 9, 11] 15 | }).should.be.true; 16 | }); 17 | 18 | it.skip("partitions_hash_fold", function() { 19 | var dataset = { 20 | label1: [1, 3, 5, 7, 9, 11, 13], 21 | label2: [0, 2, 4, 6, 8, 10, 12] 22 | }; 23 | var data = partitions.partitions_hash_fold(dataset, 2, 1); 24 | isEqual(data["test"], [7, 9, 11, 6, 8, 10]).should.be.true; 25 | var data = partitions.partitions_hash_fold(dataset, 3, 2); 26 | isEqual(data["test"], [9, 11, 8, 10]).should.be.true; 27 | }); 28 | 29 | it("partition hash", function() { 30 | var dataset = { 31 | label1: [1, 3, 5, 7, 9, 11, 13], 32 | label2: [0, 2, 4, 6, 8, 10, 12] 33 | }; 34 | partitions.partitions_hash(dataset, 2, function(train, test, index) { 35 | test.should.have.lengthOf(6); 36 | train.should.have.lengthOf(4); 37 | }); 38 | }); 39 | 40 | // A dummy dataset with 10 documents: 41 | var dataset = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 42 | it("creates 5 partitions, with a test-set of 2 in each", function() { 43 | var numOfPartitions = 0; 44 | partitions.partitions(dataset, 5, function(train, test, index) { 45 | //console.log("\t"+index+": "+train+" / "+test); 46 | train.should.have.lengthOf(8); 47 | test.should.have.lengthOf(2); 48 | _(test) 49 | .intersection(train) 50 | .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test! 51 | numOfPartitions++; 52 | }); 53 | numOfPartitions.should.equal(5); 54 | }); 55 | it("creates 3 partitions, with a test-set of 3 in each", function() { 56 | var numOfPartitions = 0; 57 | partitions.partitions(dataset, 3, function(train, test, index) { 58 | //console.log("\t"+index+": "+train+" / "+test); 59 | train.should.have.lengthOf(7); 60 | test.should.have.lengthOf(3); 61 | _(test) 62 | .intersection(train) 63 | .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test! 64 | numOfPartitions++; 65 | }); 66 | numOfPartitions.should.equal(3); 67 | }); 68 | }); 69 | -------------------------------------------------------------------------------- /test/classifiersTest/WinnowExampleTest.js: -------------------------------------------------------------------------------- 1 | import { classifiers, features as _features } from "../../index"; 2 | 3 | // First, define our base classifier type (a multi-label classifier based on winnow): 4 | var TextClassifier = classifiers.multilabel.BinaryRelevance.bind(0, { 5 | binaryClassifierType: classifiers.Winnow.bind(0, { 6 | retrain_count: 10 7 | }) 8 | }); 9 | 10 | // Define a feature extractor (a function that takes a sample and add features to a given features set): 11 | var WordExtractor = function(input, features) { 12 | input.split(" ").forEach(function(word) { 13 | features[word] = 1; 14 | }); 15 | }; 16 | 17 | describe("winnow classifier", function() { 18 | it("works with a feature-extractor", function() { 19 | // Initialize a classifier with a feature extractor: 20 | var intentClassifier = new classifiers.EnhancedClassifier({ 21 | classifierType: TextClassifier, 22 | featureExtractor: WordExtractor 23 | }); 24 | 25 | // Train and test: 26 | intentClassifier.trainBatch([ 27 | { 28 | input: "I want an apple", 29 | output: "apl" 30 | }, 31 | { 32 | input: "I want a banana", 33 | output: "bnn" 34 | }, 35 | { 36 | input: "I want chips", 37 | output: "cps" 38 | } 39 | ]); 40 | intentClassifier 41 | .classify("I want an apple and a banana") 42 | .sort() 43 | .should.eql(["apl", "bnn"]); 44 | intentClassifier 45 | .classify("I WANT AN APPLE AND A BANANA") 46 | .sort() 47 | .should.eql([]); // case sensitive 48 | }); 49 | }); 50 | 51 | describe("winnow classifier", function() { 52 | it("works with a case-normalizer", function() { 53 | //Initialize a classifier with a feature extractor and a case normalizer: 54 | intentClassifier = new classifiers.EnhancedClassifier({ 55 | classifierType: TextClassifier, 56 | normalizer: _features.LowerCaseNormalizer, 57 | featureExtractor: WordExtractor 58 | }); 59 | 60 | //Train and test: 61 | intentClassifier.trainBatch([ 62 | { 63 | input: "I want an apple", 64 | output: "apl" 65 | }, 66 | { 67 | input: "I want a banana", 68 | output: "bnn" 69 | }, 70 | { 71 | input: "I want chips", 72 | output: "cps" 73 | } 74 | ]); 75 | 76 | intentClassifier 77 | .classify("I want an apple and a banana") 78 | .sort() 79 | .should.eql(["apl", "bnn"]); 80 | intentClassifier 81 | .classify("I WANT AN APPLE AND A BANANA") 82 | .sort() 83 | .should.eql(["apl", "bnn"]); // case insensitive 84 | }); 85 | }); 86 | -------------------------------------------------------------------------------- /src/features/index.js: -------------------------------------------------------------------------------- 1 | export const NGramsOfLetters = require("./NGramsOfLetters"); 2 | export const Hypernyms = require("./HypernymExtractor"); 3 | export const FeatureLookupTable = require("./FeatureLookupTable"); 4 | export const LowerCaseNormalizer = require("./LowerCaseNormalizer"); 5 | export const RegexpNormalizer = require("./RegexpNormalizer"); 6 | export const RegexpSplitter = require("./RegexpSplitter"); 7 | 8 | /** 9 | * CollectionOfExtractors - combines the features from several feature extractors. 10 | * @param extractors - an array of other feature extractors. 11 | * @param sample - a string. 12 | * @param features an initial hash of features (optional). 13 | * @return a hash with all features generated from the sample by the different extractors 14 | */ 15 | export function CollectionOfExtractors(extractors) { 16 | return function(sample, features) { 17 | for (var i=0; i 0 ? 1 : -1); 32 | }); 33 | return this.base.train(data, labels, this.opts); 34 | }, 35 | 36 | /** 37 | * @param features - a feature-value hash. 38 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result. 39 | * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1. 40 | * @return the binary classification - 0 or 1. 41 | */ 42 | classify: function(features, explain, continuous_output) { 43 | var score = this.base.marginOne(features); 44 | var classification = continuous_output ? score : score > 0 ? 1 : 0; 45 | 46 | if (explain > 0) { 47 | var f = this.base.b; 48 | 49 | // if the linear kernel was used and w was computed and stored, 50 | // (i.e. the svm has fully finished training) 51 | // the internal class variable usew_ will be set to true. 52 | var explanations = []; 53 | if (this.base.usew_) { 54 | var w = this.base.w; 55 | for (var j = 0; j < this.base.D; j++) { 56 | explanations[j] = { 57 | feature: j, 58 | value: features[j], 59 | weight: w[j], 60 | relevance: features[j] * w[j] 61 | }; 62 | } 63 | } else { 64 | // explanations not supported. 65 | //for(var i=0;i 0 ? 1 : -1); 29 | }); 30 | return this.base.train(data, labels, this.opts); 31 | }, 32 | 33 | /** 34 | * @param features - a feature-value hash. 35 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result. 36 | * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1. 37 | * @return the binary classification - 0 or 1. 38 | */ 39 | classify: function classify(features, explain, continuous_output) { 40 | var score = this.base.marginOne(features); 41 | var classification = continuous_output ? score : score > 0 ? 1 : 0; 42 | 43 | if (explain > 0) { 44 | var f = this.base.b; // if the linear kernel was used and w was computed and stored, 45 | // (i.e. the svm has fully finished training) 46 | // the internal class variable usew_ will be set to true. 47 | 48 | var explanations = []; 49 | 50 | if (this.base.usew_) { 51 | var w = this.base.w; 52 | 53 | for (var j = 0; j < this.base.D; j++) { 54 | explanations[j] = { 55 | feature: j, 56 | value: features[j], 57 | weight: w[j], 58 | relevance: features[j] * w[j] 59 | }; 60 | } 61 | } else {// explanations not supported. 62 | //for(var i=0;i frontmatter.date <= new Date(), 30 | count: 20 31 | } 32 | ] 33 | ], 34 | head: [ 35 | [ 36 | 'link', 37 | { rel: 'apple-touch-icon', sizes: '57x57', href: '/apple-icon-57x57.png' } 38 | ], 39 | [ 40 | 'link', 41 | { rel: 'apple-touch-icon', sizes: '60x60', href: '/apple-icon-60x60.png' } 42 | ], 43 | [ 44 | 'link', 45 | { rel: 'apple-touch-icon', sizes: '72x72', href: '/apple-icon-72x72.png' } 46 | ], 47 | [ 48 | 'link', 49 | { rel: 'apple-touch-icon', sizes: '76x76', href: '/apple-icon-76x76.png' } 50 | ], 51 | [ 52 | 'link', 53 | { 54 | rel: 'apple-touch-icon', 55 | sizes: '114x114', 56 | href: '/apple-icon-114x114.png' 57 | } 58 | ], 59 | [ 60 | 'link', 61 | { 62 | rel: 'apple-touch-icon', 63 | sizes: '120x120', 64 | href: '/apple-icon-120x120.png' 65 | } 66 | ], 67 | [ 68 | 'link', 69 | { 70 | rel: 'apple-touch-icon', 71 | sizes: '144x144', 72 | href: '/apple-icon-144x144.png' 73 | } 74 | ], 75 | [ 76 | 'link', 77 | { 78 | rel: 'apple-touch-icon', 79 | sizes: '152x152', 80 | href: '/apple-icon-152x152.png' 81 | } 82 | ], 83 | [ 84 | 'link', 85 | { 86 | rel: 'apple-touch-icon', 87 | sizes: '180x180', 88 | href: '/apple-icon-180x180.png' 89 | } 90 | ], 91 | [ 92 | 'link', 93 | { 94 | rel: 'icon', 95 | type: 'image/png', 96 | sizes: '192x192', 97 | href: '/android-icon-192x192.png' 98 | } 99 | ], 100 | [ 101 | 'link', 102 | { 103 | rel: 'icon', 104 | type: 'image/png', 105 | sizes: '32x32', 106 | href: '/favicon-32x32.png' 107 | } 108 | ], 109 | [ 110 | 'link', 111 | { 112 | rel: 'icon', 113 | type: 'image/png', 114 | sizes: '96x96', 115 | href: '/favicon-96x96.png' 116 | } 117 | ], 118 | [ 119 | 'link', 120 | { 121 | rel: 'icon', 122 | type: 'image/png', 123 | sizes: '16x16', 124 | href: '/favicon-16x16.png' 125 | } 126 | ], 127 | ['link', { rel: 'manifest', href: '/manifest.json' }], 128 | ['meta', { name: 'msapplication-TileColor', content: '#ffffff' }], 129 | [ 130 | 'meta', 131 | { name: 'msapplication-TileImage', content: '/ms-icon-144x144.png' } 132 | ], 133 | ['meta', { name: 'theme-color', content: '#ffffff' }] 134 | ] 135 | } 136 | -------------------------------------------------------------------------------- /dist/features/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | Object.defineProperty(exports, "__esModule", { 4 | value: true 5 | }); 6 | exports.CollectionOfExtractors = CollectionOfExtractors; 7 | exports.NGramsFromArray = NGramsFromArray; 8 | exports.NGramsOfWords = NGramsOfWords; 9 | exports.call = call; 10 | exports.normalize = normalize; 11 | exports.RegexpSplitter = exports.RegexpNormalizer = exports.LowerCaseNormalizer = exports.FeatureLookupTable = exports.Hypernyms = exports.NGramsOfLetters = void 0; 12 | 13 | var NGramsOfLetters = require("./NGramsOfLetters"); 14 | 15 | exports.NGramsOfLetters = NGramsOfLetters; 16 | 17 | var Hypernyms = require("./HypernymExtractor"); 18 | 19 | exports.Hypernyms = Hypernyms; 20 | 21 | var FeatureLookupTable = require("./FeatureLookupTable"); 22 | 23 | exports.FeatureLookupTable = FeatureLookupTable; 24 | 25 | var LowerCaseNormalizer = require("./LowerCaseNormalizer"); 26 | 27 | exports.LowerCaseNormalizer = LowerCaseNormalizer; 28 | 29 | var RegexpNormalizer = require("./RegexpNormalizer"); 30 | 31 | exports.RegexpNormalizer = RegexpNormalizer; 32 | 33 | var RegexpSplitter = require("./RegexpSplitter"); 34 | /** 35 | * CollectionOfExtractors - combines the features from several feature extractors. 36 | * @param extractors - an array of other feature extractors. 37 | * @param sample - a string. 38 | * @param features an initial hash of features (optional). 39 | * @return a hash with all features generated from the sample by the different extractors 40 | */ 41 | 42 | 43 | exports.RegexpSplitter = RegexpSplitter; 44 | 45 | function CollectionOfExtractors(extractors) { 46 | return function (sample, features) { 47 | for (var i = 0; i < extractors.length; ++i) { 48 | extractors[i](sample, features); 49 | } 50 | }; 51 | } 52 | 53 | ; 54 | /** 55 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap: 56 | */ 57 | 58 | function NGramsFromArray(numOfWords, gap, grams, features) { 59 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 60 | grams.unshift("[start]"); 61 | grams.push("[end]"); 62 | } 63 | 64 | for (var i = 0; i <= grams.length - numOfWords; ++i) { 65 | var sliceOfWords = grams.slice(i, i + numOfWords); 66 | if (gap) sliceOfWords[1] = "-"; 67 | var feature = sliceOfWords.join(" "); 68 | features[feature.trim()] = 1; 69 | } 70 | 71 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) { 72 | grams.pop(); 73 | grams.shift(); 74 | } 75 | } 76 | 77 | function NGramsOfWords(numOfWords, gap) { 78 | return function (sample, features) { 79 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) { 80 | return !!a; 81 | }); // all non-empty words 82 | 83 | NGramsFromArray(numOfWords, gap, words, features); 84 | }; 85 | } 86 | /** 87 | * Call the given featureExtractor on the given sample, and return the result. 88 | * Used for testing. 89 | */ 90 | 91 | 92 | function call(featureExtractor, sample) { 93 | var features = {}; 94 | featureExtractor(sample, features); 95 | return features; 96 | } 97 | /** 98 | * If the input is a featureExtractor, return it as is. 99 | * 100 | * If it is an array of featureExtractors, convert it to a CollectionOfExtractors. 101 | * 102 | */ 103 | 104 | 105 | function normalize(featureExtractorOrArray) { 106 | return !featureExtractorOrArray ? featureExtractorOrArray : Array.isArray(featureExtractorOrArray) ? new CollectionOfExtractors(featureExtractorOrArray) : featureExtractorOrArray; 107 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at toroo.byamba@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /test/classifiersTest/NeuralWithNormalizerTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Enhanced Classifier 3 | */ 4 | 5 | import { EnhancedClassifier, NeuralNetwork } from "../../dist/core"; 6 | import { 7 | LowerCaseNormalizer, 8 | NGramsOfWords, 9 | RegexpNormalizer 10 | } from "../../dist/features"; 11 | 12 | describe("baseline - classifier without a normalizer", function() { 13 | it("errs on non-normalized sentencs", function() { 14 | var spamClassifier = new EnhancedClassifier({ 15 | classifierType: NeuralNetwork, 16 | featureExtractor: NGramsOfWords(1), 17 | normalizer: null 18 | }); 19 | 20 | spamClassifier.trainBatch([ 21 | { 22 | input: "cheaper watches", 23 | output: [1] 24 | }, 25 | { 26 | input: "", 27 | output: [0] 28 | } 29 | ]); 30 | 31 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam) 32 | spamClassifier.classify("cheapest watch es").should.be.below(0.2); // very high number (spam) 33 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam) 34 | }); 35 | }); 36 | 37 | describe("classifier with a single normalizer", function() { 38 | it("classifies sentences correctly", function() { 39 | var spamClassifier = new EnhancedClassifier({ 40 | classifierType: NeuralNetwork, 41 | featureExtractor: NGramsOfWords(1), 42 | normalizer: RegexpNormalizer([ 43 | { 44 | source: "er\\b", 45 | target: "" 46 | }, 47 | { 48 | source: "est\\b", 49 | target: "" 50 | }, 51 | { 52 | source: " es\\b", 53 | target: "es" 54 | } 55 | ]) 56 | }); 57 | 58 | spamClassifier.trainBatch([ 59 | { 60 | input: "cheaper watches", 61 | output: [1] 62 | }, 63 | { 64 | input: "", 65 | output: [0] 66 | } 67 | ]); 68 | 69 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam) 70 | spamClassifier.classify("cheapest watch es").should.be.above(0.8); // low number (not spam) 71 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam) 72 | }); 73 | }); 74 | 75 | describe("classifier with an array of normalizers", function() { 76 | it("classifies sentences correctly", function() { 77 | var spamClassifier = new EnhancedClassifier({ 78 | classifierType: NeuralNetwork, 79 | featureExtractor: NGramsOfWords(1), 80 | normalizer: [ 81 | LowerCaseNormalizer, 82 | RegexpNormalizer([ 83 | { 84 | source: "er\\b", 85 | target: "" 86 | } 87 | ]), 88 | RegexpNormalizer([ 89 | { 90 | source: "est\\b", 91 | target: "" 92 | } 93 | ]), 94 | RegexpNormalizer([ 95 | { 96 | source: " es\\b", 97 | target: "es" 98 | } 99 | ]) 100 | ] 101 | }); 102 | 103 | spamClassifier.trainBatch([ 104 | { 105 | input: "ChEaPeR WaTcHeS", 106 | output: [1] 107 | }, 108 | { 109 | input: "", 110 | output: [0] 111 | } 112 | ]); 113 | 114 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam) 115 | spamClassifier.classify("cheapest watch es").should.be.above(0.8); // high number (spam) 116 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam) 117 | }); 118 | }); 119 | -------------------------------------------------------------------------------- /test/classifiersTest/multilabel/MulticlassSegmentationBayesTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Multi-Label classification in the multiclass segmentation method 3 | */ 4 | 5 | import { Bayesian, multilabel } from "../../../src/core"; 6 | import { NGramsOfWords } from "../../../src/features"; 7 | import "../../sorted"; 8 | 9 | var MulticlassSegmentationBayes = multilabel.MulticlassSegmentation.bind(this, { 10 | multiclassClassifierType: Bayesian.bind(this, { 11 | calculateRelativeProbabilities: true 12 | }), 13 | featureExtractor: NGramsOfWords(1) 14 | }); 15 | 16 | // MulticlassSegmentationBayes is now in repair 17 | describe.skip("Multi-Label MCS Classifier Trained on Single-class inputs", function() { 18 | var classifier = new MulticlassSegmentationBayes(); 19 | classifier.trainBatch([ 20 | { input: "I want aa", output: "A" }, 21 | { input: "I want bb", output: "B" }, 22 | { input: "I want cc", output: "C" } 23 | ]); 24 | 25 | it("classifies 1-class samples", function() { 26 | classifier.classify("I want aa").should.eql(["A"]); 27 | classifier.classify("I want bb").should.eql(["B"]); 28 | classifier.classify("I want cc").should.eql(["C"]); 29 | }); 30 | 31 | it("classifies 2-class samples", function() { 32 | classifier 33 | .classify("I want aa bb") 34 | .sorted() 35 | .should.eql(["A", "B"]); 36 | classifier 37 | .classify("I want bb cc") 38 | .sorted() 39 | .should.eql(["B", "C"]); 40 | classifier 41 | .classify("I want cc aa") 42 | .sorted() 43 | .should.eql(["A", "C"]); 44 | }); 45 | 46 | it("classifies 2-class samples with a redundant word", function() { 47 | classifier 48 | .classify("I want aa and bb") 49 | .sorted() 50 | .should.eql(["A", "B"]); 51 | classifier 52 | .classify("I want bb and cc") 53 | .sorted() 54 | .should.eql(["B", "C"]); 55 | classifier 56 | .classify("I want cc and aa") 57 | .sorted() 58 | .should.eql(["A", "C"]); 59 | }); 60 | 61 | it("classifies 3-class samples", function() { 62 | classifier 63 | .classify("I want cc and aa and bb") 64 | .sorted() 65 | .should.eql(["A", "B", "C"]); 66 | }); 67 | 68 | // TODO: fix this case 69 | // it('classifies 0-class samples', function() { 70 | // classifier.classify("I want nothing").should.eql([]); 71 | // }); 72 | }); 73 | 74 | /*describe('Multi-Label MCS Classifier Trained on two-class inputs', function() { 75 | var classifier = new MulticlassSegmentationBayes(); 76 | classifier.trainBatch([ 77 | {input: {I:1 , want:1 , aa:1 , bb:1 }, output: ['A','B']}, // train on array with classes 78 | {input: {I:1 , want:1 , bb:1 , cc:1 }, output: ['B','C']}, // train on array with classes 79 | {input: {I:1 , want:1 , cc:1 , dd:1 }, output: [{C:1, D:1}]}, // train on set of classes 80 | {input: {I:1 , want:1 , dd:1 , aa:1 }, output: [{D:1, A:1}]}, // train on set of classes 81 | ]); 82 | 83 | it('classifies 1-class samples', function() { 84 | classifier.classify({I:1 , want:1 , aa:1 }).should.eql(['A']); 85 | //classifier.classify({I:1 , want:1 , bb:1 }).should.eql(['B']); 86 | //classifier.classify({I:1 , want:1 , cc:1 }).should.eql(['C']); 87 | //classifier.classify({I:1 , want:1 , dd:1 }).should.eql(['D']); 88 | }); 89 | 90 | it('classifies 2-class samples', function() { 91 | classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }).should.eql(['A','B']); 92 | classifier.classify({I:1 , want:1 , bb:1 , and:1 , cc:1 }).should.eql(['B','C']); 93 | //classifier.classify({I:1 , want:1 , cc:1 , and:1 , dd:1 }).should.eql(['C','D']); 94 | //classifier.classify({I:1 , want:1 , dd:1 , and:1 , aa:1 }).should.eql(['D','A']); 95 | }); 96 | }); 97 | 98 | 99 | */ 100 | -------------------------------------------------------------------------------- /test/classifiersTest/WinnowTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for winnow classifier 3 | */ 4 | 5 | import { Winnow } from "../../dist/core"; 6 | 7 | var WinnowClassifier = Winnow.bind(this, { 8 | retrain_count: 10, 9 | do_averaging: false, 10 | margin: 1 11 | }); 12 | 13 | describe("winnow classifier", function() { 14 | it("supports online training", function() { 15 | var classifier = new WinnowClassifier(); 16 | classifier.trainOnline( 17 | { 18 | a: 1, 19 | b: 0 20 | }, 21 | 0 22 | ); 23 | classifier 24 | .classify({ 25 | a: 1, 26 | b: 0 27 | }) 28 | .should.equal(0); 29 | classifier 30 | .classify({ 31 | a: 0, 32 | b: 0 33 | }) 34 | .should.equal(0); 35 | classifier 36 | .classify({ 37 | a: 0, 38 | b: 1 39 | }) 40 | .should.equal(0); 41 | classifier 42 | .classify({ 43 | a: 1, 44 | b: 1 45 | }) 46 | .should.equal(0); 47 | 48 | classifier.trainOnline( 49 | { 50 | a: 0, 51 | b: 1 52 | }, 53 | 1 54 | ); 55 | classifier 56 | .classify({ 57 | a: 1, 58 | b: 0 59 | }) 60 | .should.equal(0); 61 | classifier 62 | .classify({ 63 | a: 0, 64 | b: 1 65 | }) 66 | .should.equal(1); 67 | }); 68 | 69 | it("supports batch and online training", function() { 70 | var dataset = [ 71 | { 72 | input: { 73 | a: 1, 74 | b: 0 75 | }, 76 | output: 0 77 | }, 78 | { 79 | input: { 80 | a: 0, 81 | b: 1 82 | }, 83 | output: 1 84 | } 85 | ]; 86 | //console.log("batch: "); 87 | var classifierBatch = new WinnowClassifier(); 88 | classifierBatch.trainBatch(dataset); 89 | //console.dir(classifierBatch); 90 | 91 | //console.log("online: "); 92 | var classifierOnline = new WinnowClassifier(); 93 | for (var i = 0; i <= classifierBatch.retrain_count; ++i) 94 | for (var d = 0; d < dataset.length; ++d) 95 | classifierOnline.trainOnline(dataset[d].input, dataset[d].output); 96 | //console.dir(classifierOnline); 97 | 98 | classifierOnline.should.eql(classifierBatch); 99 | }); 100 | 101 | it("supports continuous output", function() { 102 | var classifier = new WinnowClassifier(); 103 | classifier.trainOnline( 104 | { 105 | a: 1, 106 | b: 0 107 | }, 108 | 0 109 | ); 110 | classifier.trainOnline( 111 | { 112 | a: 0, 113 | b: 1 114 | }, 115 | 1 116 | ); 117 | classifier 118 | .classify( 119 | { 120 | a: 1, 121 | b: 0 122 | }, 123 | 0, 124 | true 125 | ) 126 | .should.be.below(0); 127 | classifier 128 | .classify( 129 | { 130 | a: 0, 131 | b: 1 132 | }, 133 | 0, 134 | true 135 | ) 136 | .should.be.above(0); 137 | }); 138 | 139 | it("explains its decisions", function() { 140 | var classifier = new WinnowClassifier(); 141 | classifier.trainOnline( 142 | { 143 | a: 1, 144 | b: 0 145 | }, 146 | 0 147 | ); 148 | classifier 149 | .classify( 150 | { 151 | a: 0, 152 | b: 0 153 | }, 154 | /*explain=*/ 1 155 | ) 156 | .should.have.property("explanation") 157 | .with.lengthOf(1); 158 | classifier 159 | .classify( 160 | { 161 | a: 0, 162 | b: 0 163 | }, 164 | /*explain=*/ 3 165 | ) 166 | .should.have.property("explanation") 167 | .with.lengthOf(3); 168 | }); 169 | }); 170 | -------------------------------------------------------------------------------- /src/core/svm/svmcommon.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Utilities common to SVM wrappers 3 | */ 4 | 5 | var temp = require('temp') 6 | , fs = require('fs') 7 | , svmlight = require('../../formats/svmlight') 8 | , _ = require('underscore')._ 9 | 10 | /** 11 | * Writes the given dataset to a file in svm-light format. 12 | * @return the file name. 13 | */ 14 | module.exports.writeDatasetToFile = function(dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) { 15 | if (model_file_prefix) { 16 | var learnFile = model_file_prefix+".learn"; 17 | var fd = fs.openSync(learnFile, 'w'); 18 | } else { 19 | var tempFile = temp.openSync({prefix:default_file_prefix+"-", suffix:".learn"}); 20 | var learnFile = tempFile.path; 21 | var fd = tempFile.fd; 22 | } 23 | var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber); 24 | fs.writeSync(fd, datasetSvmlight); 25 | fs.closeSync(fd); 26 | 27 | return learnFile; 28 | } 29 | 30 | /** 31 | * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map). 32 | * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists). 33 | * @param bias if nonzero, added at the beginning of features. 34 | * @param features a map {feature_i: value_i, ....} (i >= 1) 35 | * @param explain (int) if positive, generate explanation about the classification. 36 | * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1. 37 | * @param featureLookupTable if not null, used for creating meaningful explanations. 38 | * @returns a classification value. 39 | */ 40 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) { 41 | if (explain>0) var explanations = []; 42 | var result = 0; 43 | if (bias && modelMap[0]) { 44 | var weight = modelMap[0]; 45 | var relevance = bias*modelMap[0]; 46 | result = relevance; 47 | if (explain>0) explanations.push( 48 | { 49 | feature: 'bias', 50 | value: bias, 51 | weight: weight, 52 | relevance: relevance, 53 | } 54 | ); 55 | 56 | } 57 | 58 | for (var feature in features) { 59 | var featureInModelMap = parseInt(feature)+(bias?1:0); 60 | if (featureInModelMap in modelMap) { 61 | var weight = modelMap[featureInModelMap]; 62 | var value = features[feature]; 63 | var relevance = weight*value; 64 | result += relevance; 65 | 66 | if (explain>0) explanations.push( 67 | { 68 | feature: featureLookupTable? (featureLookupTable.numberToFeature(feature)||"?"): feature, 69 | value: value, 70 | weight: weight, 71 | relevance: relevance, 72 | } 73 | ); 74 | } 75 | } 76 | 77 | if (!continuous_output) 78 | result = (result>0? 1: 0); 79 | if (_.isNaN(result)) { 80 | console.dir(explanations); 81 | throw new Error("result is NaN when classifying "+features+" with "+JSON.stringify(modelMap)) 82 | } 83 | if (explain>0) { 84 | explanations.sort(function(a,b){return Math.abs(b.relevance)-Math.abs(a.relevance)}); 85 | var explanations = _.filter(explanations, function(num){ return num.relevance!=0 }); 86 | 87 | // explanations.splice(explain, explanations.length-explain); // "explain" is the max length of explanation. 88 | 89 | 90 | if (!this.detailed_explanations) { 91 | // var sprintf = require('sprintf').sprintf; 92 | explanations = explanations.map(function(e) { 93 | // return sprintf("%s%+1.2f", e.feature, e.relevance); 94 | return [e.feature, e.relevance]; 95 | }); 96 | 97 | explanations = _.sortBy(explanations, function(num){ return num[1] }).reverse() 98 | 99 | } 100 | return { 101 | classification: result, 102 | explanation: explanations 103 | }; 104 | } else { 105 | return result; 106 | } 107 | } 108 | 109 | -------------------------------------------------------------------------------- /dist/core/svm/svmcommon.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * Utilities common to SVM wrappers 5 | */ 6 | var temp = require('temp'), 7 | fs = require('fs'), 8 | svmlight = require('../../formats/svmlight'), 9 | _ = require('underscore')._; 10 | /** 11 | * Writes the given dataset to a file in svm-light format. 12 | * @return the file name. 13 | */ 14 | 15 | 16 | module.exports.writeDatasetToFile = function (dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) { 17 | if (model_file_prefix) { 18 | var learnFile = model_file_prefix + ".learn"; 19 | var fd = fs.openSync(learnFile, 'w'); 20 | } else { 21 | var tempFile = temp.openSync({ 22 | prefix: default_file_prefix + "-", 23 | suffix: ".learn" 24 | }); 25 | var learnFile = tempFile.path; 26 | var fd = tempFile.fd; 27 | } 28 | 29 | var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber); 30 | fs.writeSync(fd, datasetSvmlight); 31 | fs.closeSync(fd); 32 | return learnFile; 33 | }; 34 | /** 35 | * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map). 36 | * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists). 37 | * @param bias if nonzero, added at the beginning of features. 38 | * @param features a map {feature_i: value_i, ....} (i >= 1) 39 | * @param explain (int) if positive, generate explanation about the classification. 40 | * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1. 41 | * @param featureLookupTable if not null, used for creating meaningful explanations. 42 | * @returns a classification value. 43 | */ 44 | 45 | 46 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) { 47 | if (explain > 0) var explanations = []; 48 | var result = 0; 49 | 50 | if (bias && modelMap[0]) { 51 | var weight = modelMap[0]; 52 | var relevance = bias * modelMap[0]; 53 | result = relevance; 54 | if (explain > 0) explanations.push({ 55 | feature: 'bias', 56 | value: bias, 57 | weight: weight, 58 | relevance: relevance 59 | }); 60 | } 61 | 62 | for (var feature in features) { 63 | var featureInModelMap = parseInt(feature) + (bias ? 1 : 0); 64 | 65 | if (featureInModelMap in modelMap) { 66 | var weight = modelMap[featureInModelMap]; 67 | var value = features[feature]; 68 | var relevance = weight * value; 69 | result += relevance; 70 | if (explain > 0) explanations.push({ 71 | feature: featureLookupTable ? featureLookupTable.numberToFeature(feature) || "?" : feature, 72 | value: value, 73 | weight: weight, 74 | relevance: relevance 75 | }); 76 | } 77 | } 78 | 79 | if (!continuous_output) result = result > 0 ? 1 : 0; 80 | 81 | if (_.isNaN(result)) { 82 | console.dir(explanations); 83 | throw new Error("result is NaN when classifying " + features + " with " + JSON.stringify(modelMap)); 84 | } 85 | 86 | if (explain > 0) { 87 | explanations.sort(function (a, b) { 88 | return Math.abs(b.relevance) - Math.abs(a.relevance); 89 | }); 90 | 91 | var explanations = _.filter(explanations, function (num) { 92 | return num.relevance != 0; 93 | }); // explanations.splice(explain, explanations.length-explain); // "explain" is the max length of explanation. 94 | 95 | 96 | if (!this.detailed_explanations) { 97 | // var sprintf = require('sprintf').sprintf; 98 | explanations = explanations.map(function (e) { 99 | // return sprintf("%s%+1.2f", e.feature, e.relevance); 100 | return [e.feature, e.relevance]; 101 | }); 102 | explanations = _.sortBy(explanations, function (num) { 103 | return num[1]; 104 | }).reverse(); 105 | } 106 | 107 | return { 108 | classification: result, 109 | explanation: explanations 110 | }; 111 | } else { 112 | return result; 113 | } 114 | }; -------------------------------------------------------------------------------- /test/classifiersTest/multilabel/MetaLabelerLanguageModelTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Multi-Label classification in the Meta-Labeler method, 3 | * with Cross-Language-Model as the underlying ranker. 4 | */ 5 | 6 | import { multilabel, Winnow } from "../../../dist/core"; 7 | import "../../sorted"; 8 | 9 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, { 10 | binaryClassifierType: Winnow.bind(this, { 11 | promotion: 1.5, 12 | demotion: 0.5, 13 | margin: 1, 14 | retrain_count: 10 15 | }) 16 | }); 17 | 18 | var CrossLanguageModelClassifier = multilabel.CrossLanguageModel.bind(this, { 19 | smoothingCoefficient: 0.9, 20 | labelFeatureExtractor: function(string, features) { 21 | if (!features) features = {}; 22 | features[string] = 1; 23 | return features; 24 | } 25 | }); 26 | 27 | var MetaLabelerLanguageModel = multilabel.MetaLabeler.bind(this, { 28 | rankerType: CrossLanguageModelClassifier, 29 | counterType: BinaryRelevanceWinnow 30 | }); 31 | 32 | var dataset = [ 33 | { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class 34 | { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect) 35 | { input: { I: 1, want: 1, cc: 1 }, output: "C" } 36 | ]; 37 | 38 | describe("CLIR Meta-Labeler batch-trained on Single-class inputs", function() { 39 | var classifierBatch = new MetaLabelerLanguageModel(); 40 | classifierBatch.trainBatch(dataset); 41 | 42 | var classifier = classifierBatch; 43 | 44 | it("classifies 1-class samples", function() { 45 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]); 46 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]); 47 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(["C"]); 48 | }); 49 | 50 | it("knows its classes", function() { 51 | classifier.getAllClasses().should.eql(["A", "B", "C"]); 52 | }); 53 | 54 | it("explains its decisions", function() { 55 | var ab = classifier.classify( 56 | { I: 1, want: 1, aa: 1, and: 1, bb: 1 }, 57 | /*explain=*/ 3 58 | ); 59 | ab.should.have.property("explanation").with.property("ranking"); 60 | ab.should.have.property("explanation").with.property("counting"); 61 | }); 62 | }); 63 | 64 | describe("CLIR Meta-Labeler batch-trained on two-class inputs", function() { 65 | var classifier = new MetaLabelerLanguageModel(); 66 | classifier.trainBatch([ 67 | { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] }, // train on array with classes 68 | { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] }, // train on array with classes 69 | { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] }, 70 | { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] } 71 | ]); 72 | 73 | it("classifies 2-class samples", function() { 74 | classifier 75 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }) 76 | .sorted() 77 | .should.eql(["A", "B"]); 78 | classifier 79 | .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 }) 80 | .sorted() 81 | .should.eql(["B", "C"]); 82 | classifier 83 | .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 }) 84 | .sorted() 85 | .should.eql(["C", "D"]); 86 | classifier 87 | .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 }) 88 | .sorted() 89 | .should.eql(["A", "D"]); 90 | }); 91 | 92 | it("explains its decisions", function() { 93 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4); 94 | classifier 95 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1) 96 | .should.have.property("explanation") 97 | .with.property("ranking"); 98 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1); 99 | classifier 100 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3) 101 | .should.have.property("explanation") 102 | .with.property("counting"); 103 | }); 104 | }); 105 | -------------------------------------------------------------------------------- /test/classifiersTest/SvmMulticlassTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for SvmLinear classifier (a wrapper for LibLinear), as a multi-class classifier. 3 | */ 4 | 5 | import { EnhancedClassifier, SvmLinear } from "../../dist/core"; 6 | import { FeatureLookupTable } from "../../dist/features"; 7 | 8 | if (!SvmLinear.isInstalled()) { 9 | console.warn("liblinear_train not found - SvmMulticlass tests skipped."); 10 | } else { 11 | var SvmClassifier = SvmLinear.bind(0, { 12 | multiclass: true, 13 | learn_args: "-c 20.0" 14 | }); 15 | 16 | describe( 17 | "SVM-LibLinear multiclass" + " with numeric features and numeric labels", 18 | function() { 19 | var trainSet = [ 20 | { 21 | input: [0, 0], 22 | output: 3 23 | }, 24 | { 25 | input: [1, 1], 26 | output: 3 27 | }, 28 | 29 | { 30 | input: [0, 1], 31 | output: 4 32 | }, 33 | { 34 | input: [1, 2], 35 | output: 4 36 | }, 37 | 38 | { 39 | input: [0, 2], 40 | output: 5 41 | }, 42 | { 43 | input: [1, 3], 44 | output: 5 45 | } 46 | ]; 47 | 48 | var classifier = new SvmClassifier(); 49 | classifier.trainBatch(trainSet); 50 | 51 | it("supports multi-class output", function() { 52 | classifier.classify([1, 0]).should.equal(3); 53 | classifier.classify([0, 1.3]).should.equal(4); 54 | classifier.classify([0, 1.7]).should.equal(5); 55 | classifier.classify([0, 3]).should.equal(5); 56 | }); 57 | 58 | it("explains its decisions", function() { 59 | classifier 60 | .classify([1, 0], 3) 61 | .should.have.property("explanation") 62 | .with.lengthOf(3); 63 | classifier 64 | .classify([0, 2], 5) 65 | .should.have.property("explanation") 66 | .with.lengthOf(3); 67 | }); 68 | 69 | it("supports classification with scores", function() { 70 | classifier.classify([1, 0], 0, true).should.have.lengthOf(3); 71 | classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2); 72 | classifier.classify([0, 1.7], 0, true)[0][0].should.equal(5); 73 | classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5); 74 | }); 75 | } 76 | ); 77 | 78 | var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, { 79 | classifierType: SvmClassifier, 80 | featureLookupTable: new FeatureLookupTable() 81 | }); 82 | 83 | var SvmClassifierStringLabels = EnhancedClassifier.bind(this, { 84 | classifierType: SvmClassifier, 85 | labelLookupTable: new FeatureLookupTable() 86 | }); 87 | 88 | describe( 89 | "SVM-LibLinear multiclass" + " with numeric features and string labels", 90 | function() { 91 | var trainSet = [ 92 | { 93 | input: [0, 0], 94 | output: "a" 95 | }, 96 | { 97 | input: [1, 1], 98 | output: "a" 99 | }, 100 | 101 | { 102 | input: [0, 1], 103 | output: "b" 104 | }, 105 | { 106 | input: [1, 2], 107 | output: "b" 108 | }, 109 | 110 | { 111 | input: [0, 2], 112 | output: "c" 113 | }, 114 | { 115 | input: [1, 3], 116 | output: "c" 117 | } 118 | ]; 119 | 120 | var classifier = new SvmClassifierStringLabels(); 121 | classifier.trainBatch(trainSet); 122 | 123 | it("supports multi-class output", function() { 124 | classifier.classify([1, 0]).should.equal("a"); 125 | classifier.classify([0, 1.3]).should.equal("b"); 126 | classifier.classify([0, 1.7]).should.equal("c"); 127 | classifier.classify([0, 3]).should.equal("c"); 128 | }); 129 | 130 | it("explains its decisions", function() { 131 | classifier 132 | .classify([1, 0], 3) 133 | .should.have.property("explanation") 134 | .with.lengthOf(3); 135 | classifier 136 | .classify([0, 2], 5) 137 | .should.have.property("explanation") 138 | .with.lengthOf(3); 139 | }); 140 | 141 | it("supports classification with scores", function() { 142 | classifier.classify([1, 0], 0, true).should.have.lengthOf(3); 143 | classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2); 144 | classifier.classify([0, 1.7], 0, true)[0][0].should.equal("c"); // must be the first! 145 | classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5); 146 | }); 147 | } 148 | ); 149 | } 150 | -------------------------------------------------------------------------------- /test/classifiersTest/SvmTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for SvmLinear classifier (a wrapper for LibLinear) and SvmPerf classifier. 3 | */ 4 | 5 | import { EnhancedClassifier, SvmLinear, SvmPerf } from "../../dist/core"; 6 | import { FeatureLookupTable } from "../../dist/features"; 7 | 8 | function test(name, SvmClassifier) { 9 | describe(name + " with numeric features", function() { 10 | var trainSet = [ 11 | { 12 | input: [0, 0], 13 | output: 0 14 | }, 15 | { 16 | input: [1, 1], 17 | output: 0 18 | }, 19 | { 20 | input: [0, 1], 21 | output: 1 22 | }, 23 | { 24 | input: [1, 2], 25 | output: 1 26 | } 27 | ]; 28 | 29 | var classifier = new SvmClassifier(); 30 | classifier.trainBatch(trainSet); 31 | 32 | it("finds the maximal margin separator", function() { 33 | // the max-margin separating line goes through [0,0.5] and [1,1.5]. It is: 34 | // 0.5+x-y = 0 35 | // or: 2y-2x-1 = 0 36 | //classifier.modelMap.should.eql({ '0': -1, '1': -2, '2': 2 }); // the LibLinear algorithm is not accurate: 37 | var modelWeights = classifier.getModelWeights(); 38 | 39 | modelWeights[0].should.be.within(-1.5, -0.5); 40 | modelWeights[1].should.be.within(-2.5, -1.5); 41 | modelWeights[2].should.be.within(1.5, 2.5); 42 | }); 43 | 44 | it("supports binary output", function() { 45 | classifier.classify([0, 2]).should.eql(1); 46 | classifier.classify([1, 0]).should.eql(0); 47 | }); 48 | 49 | // it('explains its decisions', function() { 50 | // classifier.classify([0,2], 2).should.have.property("explanation").with.lengthOf(2); 51 | // classifier.classify([1,0], 3).should.have.property("explanation").with.lengthOf(3); 52 | // }) 53 | 54 | it("supports continuous output", function() { 55 | classifier.classify([0, 2], 0, true).should.be.within(2.5, 3.5); // should equal 3, but it is not accurate enough 56 | classifier.classify([1, 0], 0, true).should.be.within(-3.5, -2.5); // should equal -3, but it is not accurate enough 57 | }); 58 | }); 59 | 60 | var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, { 61 | classifierType: SvmClassifier, 62 | featureLookupTable: new FeatureLookupTable() 63 | }); 64 | 65 | describe(name + " with string features", function() { 66 | var trainSet = [ 67 | { 68 | input: { 69 | a: 0, 70 | b: 0 71 | }, 72 | output: 0 73 | }, 74 | { 75 | input: { 76 | a: 1, 77 | b: 1 78 | }, 79 | output: 0 80 | }, 81 | { 82 | input: { 83 | a: 0, 84 | b: 1 85 | }, 86 | output: 1 87 | }, 88 | { 89 | input: { 90 | a: 1, 91 | b: 2 92 | }, 93 | output: 1 94 | } 95 | ]; 96 | 97 | var classifier = new SvmClassifierStringFeatures(); 98 | classifier.trainBatch(trainSet); 99 | 100 | it("supports binary output", function() { 101 | classifier 102 | .classify({ 103 | a: 0, 104 | b: 2 105 | }) 106 | .should.eql(1); 107 | classifier 108 | .classify({ 109 | a: 1, 110 | b: 0 111 | }) 112 | .should.eql(0); 113 | }); 114 | 115 | // it('explains its classifications', function() { 116 | // classifier.classify({a:0, b:2}, 2).should.have.property("explanation").with.lengthOf(2); 117 | // classifier.classify({a:1, b:0}, 3).should.have.property("explanation").with.lengthOf(3); 118 | // }) 119 | 120 | it("supports continuous output", function() { 121 | classifier 122 | .classify( 123 | { 124 | a: 0, 125 | b: 2 126 | }, 127 | 0, 128 | true 129 | ) 130 | .should.be.above(0); 131 | classifier 132 | .classify( 133 | { 134 | a: 1, 135 | b: 0 136 | }, 137 | 0, 138 | true 139 | ) 140 | .should.be.below(0); 141 | }); 142 | }); 143 | } // end of function 144 | 145 | if (SvmPerf.isInstalled()) 146 | test( 147 | "SVM-Perf", 148 | SvmPerf.bind(this, { 149 | learn_args: "-c 20.0" 150 | }) 151 | ); 152 | else console.warn("svm_perf_learn not found - SvmPerf tests skipped."); 153 | 154 | if (SvmLinear.isInstalled()) 155 | test( 156 | "SVM-LibLinear", 157 | SvmLinear.bind(this, { 158 | learn_args: "-c 20.0", 159 | multiclass: false 160 | }) 161 | ); 162 | else console.warn("liblinear_train not found - SvmLinear tests skipped."); 163 | -------------------------------------------------------------------------------- /src/features/FeatureLookupTable.js: -------------------------------------------------------------------------------- 1 | /** 2 | * FeatureLookupTable - a table for converting features to numbers and vice versa 3 | */ 4 | class FeatureLookupTable { 5 | constructor() { 6 | this.featureIndexToFeatureName = [undefined]; 7 | this.featureNameToFeatureIndex = { undefined: 0 }; 8 | } 9 | } 10 | 11 | FeatureLookupTable.prototype = { 12 | 13 | // add a single feature, if it does not exist 14 | addFeature: function(feature) { 15 | if (!(feature in this.featureNameToFeatureIndex)) { 16 | var newIndex = this.featureIndexToFeatureName.length; 17 | this.featureIndexToFeatureName.push(feature); 18 | this.featureNameToFeatureIndex[feature] = newIndex; 19 | } 20 | }, 21 | 22 | // add all features in the given hash or array 23 | addFeatures: function(hash) { 24 | if (hash instanceof Array) { 25 | for (var index in hash) 26 | this.addFeature(hash[index]); 27 | } else if (hash instanceof Object) { 28 | for (var feature in hash) 29 | this.addFeature(feature); 30 | } 31 | else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: "+JSON.stringify(hash)); 32 | }, 33 | 34 | // add all features in all hashes in the given array 35 | addFeaturess: function(hashes) { 36 | for (var i=0; i= 0 which represents a number of labels. 16 | * The MetaLabeler returns the C most relevant labels from the list returned by the ranker. 17 | * 18 | * @param opts 19 | * rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels. 20 | * counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels. 21 | */ 22 | class MetaLabeler { 23 | constructor(opts) { 24 | if (!opts.rankerType) { 25 | console.dir(opts); 26 | throw new Error("opts.rankerType not found"); 27 | } 28 | if (!opts.counterType) { 29 | console.dir(opts); 30 | throw new Error("opts.counterType not found"); 31 | } 32 | this.ranker = new opts.rankerType(); 33 | this.counter = new opts.counterType(); 34 | } 35 | } 36 | 37 | MetaLabeler.prototype = { 38 | 39 | /** 40 | * Tell the classifier that the given sample belongs to the given classes. 41 | * 42 | * @param sample a document. 43 | * @param labels an array whose VALUES are classes. 44 | */ 45 | trainOnline: function(sample, labels) { 46 | // The ranker is just trained by the given set of relevant labels: 47 | this.ranker.trainOnline(sample, labels); 48 | 49 | // The counter is trained by the *number* of relevant labels: 50 | var labelCount = (Array.isArray(labels)? labels: Object.keys(labels)).length; 51 | this.counter.trainOnline(sample, labelCount); 52 | }, 53 | 54 | /** 55 | * Train the classifier with all the given documents. 56 | * 57 | * @param dataset 58 | * an array with objects of the format: 59 | * {input: sample1, output: [class11, class12...]} 60 | */ 61 | trainBatch : function(dataset) { 62 | // The ranker is just trained by the given set of labels relevant to each sample: 63 | this.ranker.trainBatch(dataset); 64 | 65 | // The counter is trained by the *number* of labels relevant to each sample: 66 | var labelCountDataset = dataset.map(function(datum) { 67 | var labelCount = (Array.isArray(datum.output)? datum.output.length: 1); 68 | return { 69 | input: datum.input, 70 | output: labelCount 71 | }; 72 | }); 73 | this.counter.trainBatch(labelCountDataset); 74 | }, 75 | 76 | /** 77 | * Use the model trained so far to classify a new sample. 78 | * 79 | * @param sample a document. 80 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result. 81 | * 82 | * @return an array whose VALUES are classes. 83 | */ 84 | classify: function(sample, explain) { 85 | var rankedLabelsWithExplain = this.ranker.classify(sample, explain, /*withScores=*/true); 86 | var rankedLabels = (explain>0? rankedLabelsWithExplain.classes: rankedLabelsWithExplain); 87 | var labelCountWithExplain = this.counter.classify(sample, explain, /*withScores=*/true); 88 | var labelCount = (explain>0? labelCountWithExplain.classes[0][0]: labelCountWithExplain[0][0]); 89 | if (_.isString(labelCount)) labelCount = parseInt(labelCount); 90 | 91 | // Pick the labelCount most relevant labels from the list returned by the ranker: 92 | var positiveLabelsWithScores = rankedLabels.slice(0, labelCount); 93 | 94 | var positiveLabels = positiveLabelsWithScores 95 | 96 | if (positiveLabelsWithScores.length != 0) 97 | if (_.isArray(positiveLabelsWithScores[0])) 98 | var positiveLabels = positiveLabelsWithScores.map(function(labelWithScore) {return labelWithScore[0]}); 99 | 100 | return (explain>0? { 101 | classes: positiveLabels, 102 | explanation: { 103 | ranking: rankedLabelsWithExplain.explanation, 104 | counting: labelCountWithExplain.explanation 105 | } 106 | }: 107 | positiveLabels) 108 | }, 109 | 110 | getAllClasses: function() { 111 | return this.ranker.getAllClasses(); 112 | }, 113 | 114 | toJSON : function() { 115 | }, 116 | 117 | fromJSON : function(json) { 118 | }, 119 | 120 | /** 121 | * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations. 122 | */ 123 | setFeatureLookupTable: function(featureLookupTable) { 124 | if (this.ranker.setFeatureLookupTable) 125 | this.ranker.setFeatureLookupTable(featureLookupTable); 126 | if (this.counter.setFeatureLookupTable) 127 | this.counter.setFeatureLookupTable(featureLookupTable); 128 | }, 129 | } 130 | 131 | 132 | module.exports = MetaLabeler; 133 | -------------------------------------------------------------------------------- /dist/features/FeatureLookupTable.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 4 | 5 | /** 6 | * FeatureLookupTable - a table for converting features to numbers and vice versa 7 | */ 8 | var FeatureLookupTable = function FeatureLookupTable() { 9 | _classCallCheck(this, FeatureLookupTable); 10 | 11 | this.featureIndexToFeatureName = [undefined]; 12 | this.featureNameToFeatureIndex = { 13 | undefined: 0 14 | }; 15 | }; 16 | 17 | FeatureLookupTable.prototype = { 18 | // add a single feature, if it does not exist 19 | addFeature: function addFeature(feature) { 20 | if (!(feature in this.featureNameToFeatureIndex)) { 21 | var newIndex = this.featureIndexToFeatureName.length; 22 | this.featureIndexToFeatureName.push(feature); 23 | this.featureNameToFeatureIndex[feature] = newIndex; 24 | } 25 | }, 26 | // add all features in the given hash or array 27 | addFeatures: function addFeatures(hash) { 28 | if (hash instanceof Array) { 29 | for (var index in hash) { 30 | this.addFeature(hash[index]); 31 | } 32 | } else if (hash instanceof Object) { 33 | for (var feature in hash) { 34 | this.addFeature(feature); 35 | } 36 | } else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: " + JSON.stringify(hash)); 37 | }, 38 | // add all features in all hashes in the given array 39 | addFeaturess: function addFeaturess(hashes) { 40 | for (var i = 0; i < hashes.length; ++i) { 41 | this.addFeatures(hashes[i]); 42 | } 43 | }, 44 | 45 | /** 46 | * Convert the given feature to a numeric index. 47 | */ 48 | featureToNumber: function featureToNumber(feature) { 49 | this.addFeature(feature); 50 | return this.featureNameToFeatureIndex[feature]; 51 | }, 52 | numberToFeature: function numberToFeature(number) { 53 | return this.featureIndexToFeatureName[number]; 54 | }, 55 | 56 | /** 57 | * Convert the given hash of features to a numeric array, using 0 for padding. 58 | * If some features in the hash do not exist - they will be added. 59 | * @param hash any hash, for example, {a: 111, b: 222, c: 333} 60 | * @return a matching array, based on the current feature table. For example: [0, 111, 222, 0, 333] 61 | * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js 62 | */ 63 | hashToArray: function hashToArray(hash) { 64 | this.addFeatures(hash); 65 | var array = []; 66 | 67 | for (var featureIndex = 0; featureIndex < this.featureIndexToFeatureName.length; ++featureIndex) { 68 | array[featureIndex] = 0; 69 | } 70 | 71 | if (hash instanceof Array) { 72 | for (var i in hash) { 73 | array[this.featureNameToFeatureIndex[hash[i]]] = true; 74 | } 75 | } else if (hash instanceof Object) { 76 | for (var feature in hash) { 77 | array[this.featureNameToFeatureIndex[feature]] = hash[feature]; 78 | } 79 | } else throw new Error("Unsupported type: " + JSON.stringify(hash)); 80 | 81 | return array; 82 | }, 83 | 84 | /** 85 | * Convert all the given hashes of features to numeric arrays, using 0 for padding. 86 | * If some features in some of the hashes do not exist - they will be added. 87 | * @param hashes an array of hashes, for example, [{a: 111, b: 222}, {a: 11, c: 33}, ...] 88 | * @return an array of matching arrays, based on the current feature table. For example: [[111, 222], [11, 0, 33]] 89 | */ 90 | hashesToArrays: function hashesToArrays(hashes) { 91 | this.addFeaturess(hashes); 92 | var arrays = []; 93 | 94 | for (var i = 0; i < hashes.length; ++i) { 95 | arrays[i] = []; 96 | 97 | for (var feature in this.featureNameToFeatureIndex) { 98 | arrays[i][this.featureNameToFeatureIndex[feature]] = hashes[i][feature] || 0; 99 | } 100 | } 101 | 102 | return arrays; 103 | }, 104 | 105 | /** 106 | * Convert the given numeric array to a hash of features, ignoring zero values. 107 | * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js 108 | */ 109 | arrayToHash: function arrayToHash(array) { 110 | var hash = {}; 111 | 112 | for (var feature in this.featureNameToFeatureIndex) { 113 | if (array[this.featureNameToFeatureIndex[feature]]) hash[feature] = array[this.featureNameToFeatureIndex[feature]]; 114 | } 115 | 116 | return hash; 117 | }, 118 | 119 | /** 120 | * Convert the given numeric arrays to array of hashes of features, ignoring zero values. 121 | */ 122 | arraysToHashes: function arraysToHashes(arrays) { 123 | var hashes = []; 124 | 125 | for (var i = 0; i < arrays.length; ++i) { 126 | hashes[i] = this.arrayToHash(arrays[i]); 127 | } 128 | 129 | return hashes; 130 | }, 131 | toJSON: function toJSON() { 132 | return { 133 | featureIndexToFeatureName: this.featureIndexToFeatureName, 134 | featureNameToFeatureIndex: this.featureNameToFeatureIndex 135 | }; 136 | }, 137 | fromJSON: function fromJSON(json) { 138 | this.featureIndexToFeatureName = json.featureIndexToFeatureName; 139 | this.featureNameToFeatureIndex = json.featureNameToFeatureIndex; 140 | } 141 | }; 142 | module.exports = FeatureLookupTable; -------------------------------------------------------------------------------- /test/classifiersTest/multilabel/MetaLabelerWinnowTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * a unit-test for Multi-Label classification in the Meta-Labeler method, 3 | * with Modified Balanced Winnow as the underlying binary classifier. 4 | */ 5 | 6 | import { multilabel, Winnow } from "../../../dist/core"; 7 | import "../../sorted"; 8 | 9 | var retrain_count = 10; 10 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, { 11 | binaryClassifierType: Winnow.bind(this, { 12 | promotion: 1.5, 13 | demotion: 0.5, 14 | margin: 1, 15 | retrain_count: retrain_count 16 | }) 17 | }); 18 | 19 | var MetaLabelerWinnow = multilabel.MetaLabeler.bind(this, { 20 | rankerType: BinaryRelevanceWinnow, 21 | counterType: BinaryRelevanceWinnow 22 | }); 23 | 24 | var dataset = [ 25 | { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class 26 | { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect) 27 | { input: { I: 1, want: 1, cc: 1 }, output: [{ C: "c" }] } // train on structured class, that will be stringified to "{C:c}". 28 | ]; 29 | 30 | describe("Meta-Labeler batch-trained on Single-class inputs", function() { 31 | var classifierBatch = new MetaLabelerWinnow(); 32 | classifierBatch.trainBatch(dataset); 33 | 34 | var classifier = classifierBatch; 35 | it("classifies 1-class samples", function() { 36 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]); 37 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]); 38 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']); 39 | }); 40 | 41 | it("knows its classes", function() { 42 | classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']); 43 | }); 44 | 45 | it("explains its decisions", function() { 46 | var ab = classifier.classify( 47 | { I: 1, want: 1, aa: 1, and: 1, bb: 1 }, 48 | /*explain=*/ 3 49 | ); 50 | //console.dir(ab); 51 | ab.should.have.property("explanation").with.property("ranking"); 52 | ab.should.have.property("explanation").with.property("counting"); 53 | }); 54 | }); 55 | 56 | describe("Meta-Labeler online-trained on Single-class inputs", function() { 57 | var classifierOnline = new MetaLabelerWinnow(); 58 | for (var i = 0; i <= retrain_count; ++i) 59 | for (var d = 0; d < dataset.length; ++d) 60 | classifierOnline.trainOnline(dataset[d].input, dataset[d].output); 61 | 62 | var classifier = classifierOnline; 63 | it("classifies 1-class samples", function() { 64 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]); 65 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]); 66 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']); 67 | }); 68 | 69 | it("knows its classes", function() { 70 | classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']); 71 | }); 72 | 73 | it("explains its decisions", function() { 74 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(3); 75 | classifier 76 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1) 77 | .should.have.property("explanation") 78 | .with.property("ranking"); 79 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1); 80 | classifier 81 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3) 82 | .should.have.property("explanation") 83 | .with.property("counting"); 84 | }); 85 | }); 86 | 87 | describe("Meta-Labeler batch-trained on two-class inputs", function() { 88 | var classifier = new MetaLabelerWinnow(); 89 | classifier.trainBatch([ 90 | { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] }, 91 | { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] }, 92 | { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] }, 93 | { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] } 94 | ]); 95 | 96 | it("classifies 2-class samples", function() { 97 | classifier 98 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }) 99 | .sorted() 100 | .should.eql(["A", "B"]); 101 | classifier 102 | .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 }) 103 | .sorted() 104 | .should.eql(["B", "C"]); 105 | classifier 106 | .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 }) 107 | .sorted() 108 | .should.eql(["C", "D"]); 109 | classifier 110 | .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 }) 111 | .sorted() 112 | .should.eql(["A", "D"]); 113 | }); 114 | 115 | it("explains its decisions", function() { 116 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4); 117 | classifier 118 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1) 119 | .should.have.property("explanation") 120 | .with.property("ranking"); 121 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1); 122 | classifier 123 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3) 124 | .should.have.property("explanation") 125 | .with.property("counting"); 126 | }); 127 | }); 128 | -------------------------------------------------------------------------------- /dist/utils/partitions.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * Utilities for partitioning datasets of documents for training and testing. 5 | * 6 | * @author Erel Segal-haLevi 7 | * @since 2013-06 8 | */ 9 | var _ = require("underscore")._; 10 | /** 11 | * Create a single partition of the given dataset. 12 | * 13 | * @param dataset an array. 14 | * @param testSetStart an index into the array. 15 | * @param testSetCount int - the num of samples in the test set, starting from testSetStart. 16 | * @return an object {train: trainSet, test: testSet}s 17 | */ 18 | 19 | 20 | exports.partition = function (dataset, testSetStart, testSetCount) { 21 | var datasetclone = JSON.parse(JSON.stringify(dataset)); 22 | var testSet = datasetclone.splice(testSetStart, testSetCount); 23 | var trainSet = datasetclone; // without the test-set 24 | 25 | return { 26 | train: trainSet, 27 | test: testSet 28 | }; 29 | }; 30 | /** 31 | * Create several different partitions of the given dataset to train and test. 32 | * Useful for cross-validation. 33 | * 34 | * @param dataset any array. 35 | * @param numOfPartitions number of different partitions to generate. 36 | * @param callback a function to call for each partition. 37 | * 38 | * @return an object: {train: [array-for-train], test: [array-for-test]} 39 | * @note code adapted from Heather Arthur: https://github.com/harthur/classifier/blob/master/test/cross-validation/cross-validate.js 40 | */ 41 | 42 | 43 | exports.partitions = function (dataset, numOfPartitions, callback) { 44 | var shuffledDataset = _.shuffle(dataset); 45 | 46 | var testSetCount = dataset.length / numOfPartitions; 47 | 48 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) { 49 | var testSetStart = iPartition * testSetCount; 50 | var partition = exports.partition(dataset, testSetStart, testSetCount); 51 | callback(partition.train, partition.test, iPartition); 52 | } 53 | }; 54 | /** 55 | * Create several different partitions of the given dataset to train and test without doing shuffling 56 | * Useful for cross-validation in Threshold classifier. 57 | * 58 | */ 59 | 60 | 61 | exports.partitions_consistent_by_fold = function (dataset, numOfPartitions, partitionIndex) { 62 | if (!_.isArray(dataset)) throw new Error("dataset is not an array"); 63 | if (_.isUndefined(numOfPartitions)) throw new Error("numOfPartitions " + numOfPartitions); 64 | if (_.isUndefined(partitionIndex)) throw new Error("partitionIndex " + partitionIndex); 65 | var testSetCount = dataset.length / numOfPartitions; 66 | var result = { 67 | 'train': [], 68 | 'test': [] 69 | }; 70 | 71 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) { 72 | var testSetStart = iPartition * testSetCount; 73 | var partition = exports.partition(dataset, testSetStart, testSetCount); 74 | 75 | if (iPartition == partitionIndex) { 76 | result['train'] = partition.train; 77 | result['test'] = partition.test; 78 | } 79 | } 80 | 81 | return result; 82 | }; 83 | 84 | exports.partitions_consistent = function (dataset, numOfPartitions, callback) { 85 | var testSetCount = dataset.length / numOfPartitions; 86 | 87 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) { 88 | var testSetStart = iPartition * testSetCount; 89 | var partition = exports.partition(dataset, testSetStart, testSetCount); 90 | callback(partition.train, partition.test, iPartition); 91 | } 92 | }; 93 | 94 | exports.partitions_reverese = function (dataset, numOfPartitions, callback) { 95 | var testSetCount = dataset.length / numOfPartitions; 96 | 97 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) { 98 | var testSetStart = iPartition * testSetCount; 99 | var partition = exports.partition(dataset, testSetStart, testSetCount); 100 | callback(partition.test, partition.train, iPartition); 101 | } 102 | }; 103 | 104 | exports.partitions_hash = function (datasetor, numOfPartitions, callback) { 105 | var count = datasetor[Object.keys(datasetor)[0]].length; 106 | var testSetCount = Math.floor(count / numOfPartitions); 107 | 108 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) { 109 | var testSetStart = iPartition * testSetCount; 110 | var dataset = JSON.parse(JSON.stringify(datasetor)); 111 | var test = []; 112 | var train = []; 113 | 114 | _(count - testSetCount).times(function (n) { 115 | train.push([]); 116 | }); 117 | 118 | _.each(dataset, function (value, key, list) { 119 | test = test.concat(value.splice(testSetStart, testSetCount)); 120 | 121 | _.each(value, function (elem, key1, list1) { 122 | train[key1].push(elem); 123 | }, this); 124 | }, this); 125 | 126 | callback(train, test, iPartition); 127 | } 128 | }; 129 | 130 | exports.partitions_hash_fold = function (datasetor, numOfPartitions, fold) { 131 | var count = datasetor[Object.keys(datasetor)[0]].length; 132 | var testSetCount = Math.floor(count / numOfPartitions); 133 | var testSetStart = fold * testSetCount; // var dataset = JSON.parse(JSON.stringify(datasetor)) 134 | 135 | var test = []; 136 | var train = []; 137 | 138 | _(count - testSetCount).times(function (n) { 139 | train.push([]); 140 | }); 141 | 142 | _.each(datasetor, function (value, key, list) { 143 | test = test.concat(value.splice(testSetStart, testSetCount)); 144 | 145 | _.each(value, function (elem, key1, list1) { 146 | train[key1].push(elem); 147 | }, this); 148 | }, this); 149 | 150 | return { 151 | "train": train, 152 | "test": test 153 | }; 154 | }; -------------------------------------------------------------------------------- /dist/core/multilabel/MetaLabeler.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 4 | 5 | var hash = require("../../utils/hash"); 6 | 7 | var sprintf = require("sprintf").sprintf; 8 | 9 | var _ = require("underscore")._; 10 | /** 11 | * MetaLabeler - Multi-label classifier, based on: 12 | * 13 | * Tang Lei, Rajan Suju, Narayanan Vijay K.. Large scale multi-label classification via metalabeler in Proceedings of the 18th international conference on World wide webWWW '09(New York, NY, USA):211-220ACM 2009. 14 | * http://www.citeulike.org/user/erelsegal-halevi/article/4860265 15 | * 16 | * A MetaLabeler uses two multi-class classifiers to create a single multi-label classifier. One is called "ranker" and the other is called "counter". 17 | * 18 | * The MetaLabeler assigns labels to a sample in the following two stages: 19 | * - Stage 1: Ranking. The sample is sent to the "ranker", which returns all available labels ordered from the most relevant to the least relevant. 20 | * - Stage 2: Counting. The sample is sent to the "counter", which returns integer C >= 0 which represents a number of labels. 21 | * The MetaLabeler returns the C most relevant labels from the list returned by the ranker. 22 | * 23 | * @param opts 24 | * rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels. 25 | * counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels. 26 | */ 27 | 28 | 29 | var MetaLabeler = function MetaLabeler(opts) { 30 | _classCallCheck(this, MetaLabeler); 31 | 32 | if (!opts.rankerType) { 33 | console.dir(opts); 34 | throw new Error("opts.rankerType not found"); 35 | } 36 | 37 | if (!opts.counterType) { 38 | console.dir(opts); 39 | throw new Error("opts.counterType not found"); 40 | } 41 | 42 | this.ranker = new opts.rankerType(); 43 | this.counter = new opts.counterType(); 44 | }; 45 | 46 | MetaLabeler.prototype = { 47 | /** 48 | * Tell the classifier that the given sample belongs to the given classes. 49 | * 50 | * @param sample a document. 51 | * @param labels an array whose VALUES are classes. 52 | */ 53 | trainOnline: function trainOnline(sample, labels) { 54 | // The ranker is just trained by the given set of relevant labels: 55 | this.ranker.trainOnline(sample, labels); // The counter is trained by the *number* of relevant labels: 56 | 57 | var labelCount = (Array.isArray(labels) ? labels : Object.keys(labels)).length; 58 | this.counter.trainOnline(sample, labelCount); 59 | }, 60 | 61 | /** 62 | * Train the classifier with all the given documents. 63 | * 64 | * @param dataset 65 | * an array with objects of the format: 66 | * {input: sample1, output: [class11, class12...]} 67 | */ 68 | trainBatch: function trainBatch(dataset) { 69 | // The ranker is just trained by the given set of labels relevant to each sample: 70 | this.ranker.trainBatch(dataset); // The counter is trained by the *number* of labels relevant to each sample: 71 | 72 | var labelCountDataset = dataset.map(function (datum) { 73 | var labelCount = Array.isArray(datum.output) ? datum.output.length : 1; 74 | return { 75 | input: datum.input, 76 | output: labelCount 77 | }; 78 | }); 79 | this.counter.trainBatch(labelCountDataset); 80 | }, 81 | 82 | /** 83 | * Use the model trained so far to classify a new sample. 84 | * 85 | * @param sample a document. 86 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result. 87 | * 88 | * @return an array whose VALUES are classes. 89 | */ 90 | classify: function classify(sample, explain) { 91 | var rankedLabelsWithExplain = this.ranker.classify(sample, explain, 92 | /*withScores=*/ 93 | true); 94 | var rankedLabels = explain > 0 ? rankedLabelsWithExplain.classes : rankedLabelsWithExplain; 95 | var labelCountWithExplain = this.counter.classify(sample, explain, 96 | /*withScores=*/ 97 | true); 98 | var labelCount = explain > 0 ? labelCountWithExplain.classes[0][0] : labelCountWithExplain[0][0]; 99 | if (_.isString(labelCount)) labelCount = parseInt(labelCount); // Pick the labelCount most relevant labels from the list returned by the ranker: 100 | 101 | var positiveLabelsWithScores = rankedLabels.slice(0, labelCount); 102 | var positiveLabels = positiveLabelsWithScores; 103 | if (positiveLabelsWithScores.length != 0) if (_.isArray(positiveLabelsWithScores[0])) var positiveLabels = positiveLabelsWithScores.map(function (labelWithScore) { 104 | return labelWithScore[0]; 105 | }); 106 | return explain > 0 ? { 107 | classes: positiveLabels, 108 | explanation: { 109 | ranking: rankedLabelsWithExplain.explanation, 110 | counting: labelCountWithExplain.explanation 111 | } 112 | } : positiveLabels; 113 | }, 114 | getAllClasses: function getAllClasses() { 115 | return this.ranker.getAllClasses(); 116 | }, 117 | toJSON: function toJSON() {}, 118 | fromJSON: function fromJSON(json) {}, 119 | 120 | /** 121 | * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations. 122 | */ 123 | setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) { 124 | if (this.ranker.setFeatureLookupTable) this.ranker.setFeatureLookupTable(featureLookupTable); 125 | if (this.counter.setFeatureLookupTable) this.counter.setFeatureLookupTable(featureLookupTable); 126 | } 127 | }; 128 | module.exports = MetaLabeler; -------------------------------------------------------------------------------- /dist/core/decisiontree/DecisionTree.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /* Implementation of Decision Tree classifier, ID3 implementation 4 | the code based on https://github.com/bugless/nodejs-decision-tree-id3/blob/master/lib/decision-tree.js 5 | */ 6 | var _ = require('underscore'); 7 | 8 | function DecisionTree(opts) { 9 | if (!opts) opts = {}; // this.debug = opts.debug || false; 10 | } 11 | 12 | DecisionTree.prototype = { 13 | toJSON: function toJSON() { 14 | return this.root; 15 | }, 16 | fromJSON: function fromJSON(json) { 17 | this.root = json; 18 | }, 19 | createTree: function createTree(dataset, features) { 20 | var targets = _.unique(_.pluck(dataset, 'output')); 21 | 22 | if (targets.length == 1) { 23 | // console.log("end node! "+targets[0]); 24 | return { 25 | type: "result", 26 | val: targets[0], 27 | name: targets[0], 28 | alias: targets[0] + this.randomTag() 29 | }; 30 | } 31 | 32 | if (features.length == 0) { 33 | // console.log("returning the most dominate feature!!!"); 34 | var topTarget = this.mostCommon(targets); 35 | return { 36 | type: "result", 37 | val: topTarget, 38 | name: topTarget, 39 | alias: topTarget + this.randomTag() 40 | }; 41 | } 42 | 43 | var bestFeature = this.maxGain(dataset, features); 44 | 45 | var remainingFeatures = _.without(features, bestFeature); 46 | 47 | var possibleValues = _.unique(_.pluck(_.pluck(dataset, 'input'), bestFeature)); 48 | 49 | var node = { 50 | name: bestFeature, 51 | alias: bestFeature + this.randomTag() 52 | }; 53 | node.type = "feature"; 54 | node.vals = _.map(possibleValues, function (v) { 55 | var _newS = dataset.filter(function (x) { 56 | return x['input'][bestFeature] == v; 57 | }); 58 | 59 | var child_node = { 60 | name: v, 61 | alias: v + this.randomTag(), 62 | type: "feature_value" 63 | }; 64 | child_node.child = this.createTree(_newS, remainingFeatures); 65 | return child_node; 66 | }, this); 67 | return node; 68 | }, 69 | mostCommon: function mostCommon(l) { 70 | return _.sortBy(l, function (a) { 71 | return this.count(a, l); 72 | }, this).reverse()[0]; 73 | }, 74 | count: function count(a, l) { 75 | return _.filter(l, function (b) { 76 | return b === a; 77 | }).length; 78 | }, 79 | randomTag: function randomTag() { 80 | return "_r" + Math.round(Math.random() * 1000000).toString(); 81 | }, 82 | extractFeatures: function extractFeatures(dataset) { 83 | var features = []; 84 | 85 | for (var record in dataset) { 86 | for (var key in dataset[record]['input']) { 87 | features.push(key); 88 | } 89 | } 90 | 91 | return features; 92 | }, 93 | gain: function gain(dataset, feature) { 94 | var attrVals = _.unique(_.pluck(_.pluck(dataset, 'input'), feature)); 95 | 96 | var setEntropy = this.entropy(_.pluck(dataset, 'output')); 97 | 98 | var setSize = _.size(dataset); 99 | 100 | var entropies = attrVals.map(function (n) { 101 | var subset = dataset.filter(function (x) { 102 | return x['input'][feature] === n; 103 | }); 104 | return subset.length / setSize * this.entropy(_.pluck(subset, 'output')); 105 | }, this); 106 | var sumOfEntropies = entropies.reduce(function (a, b) { 107 | return a + b; 108 | }, 0); 109 | return setEntropy - sumOfEntropies; 110 | }, 111 | entropy: function entropy(vals) { 112 | var uniqueVals = _.unique(vals); 113 | 114 | var probs = uniqueVals.map(function (x) { 115 | return this.prob(x, vals); 116 | }, this); 117 | var logVals = probs.map(function (p) { 118 | return -p * this.log2(p); 119 | }, this); 120 | return logVals.reduce(function (a, b) { 121 | return a + b; 122 | }, 0); 123 | }, 124 | prob: function prob(val, vals) { 125 | var instances = _.filter(vals, function (x) { 126 | return x === val; 127 | }).length; 128 | 129 | var total = vals.length; 130 | return instances / total; 131 | }, 132 | log2: function log2(n) { 133 | return Math.log(n) / Math.log(2); 134 | }, 135 | maxGain: function maxGain(dataset, features) { 136 | return _.max(features, function (e) { 137 | return this.gain(dataset, e); 138 | }, this); 139 | }, 140 | setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) { 141 | this.featureLookupTable = featureLookupTable; 142 | }, 143 | 144 | /** 145 | * Batch training (a set of samples). Uses the option this.retrain_count. 146 | * 147 | * @param dataset an array of samples of the form {input: {feature1: value1...} , output: 0/1} 148 | */ 149 | trainBatch: function trainBatch(dataset) { 150 | var features = this.extractFeatures(dataset); 151 | this.root = this.createTree(dataset, features); 152 | }, 153 | 154 | /** 155 | * @param inputs a SINGLE sample (a hash of feature-value pairs). 156 | * @param continuous_output if true, return the net classification value. If false [default], return 0 or 1. 157 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result. 158 | * @return the classification of the sample. 159 | */ 160 | classify: function classify(features) { 161 | root = this.root; 162 | 163 | while (root.type !== "result") { 164 | var attr = root.name; 165 | var sampleVal = features[attr]; 166 | 167 | var childNode = _.detect(root.vals, function (x) { 168 | return x.name == sampleVal; 169 | }); 170 | 171 | if (childNode) { 172 | root = childNode.child; 173 | } else { 174 | root = root.vals[0].child; 175 | } 176 | } 177 | 178 | return root.val; 179 | } 180 | }; 181 | module.exports = DecisionTree; --------------------------------------------------------------------------------