├── docs
├── CNAME
├── .vuepress
│ ├── public
│ │ ├── logo.png
│ │ ├── favicon.ico
│ │ ├── apple-icon.png
│ │ ├── favicon-16x16.png
│ │ ├── favicon-32x32.png
│ │ ├── favicon-96x96.png
│ │ ├── ms-icon-70x70.png
│ │ ├── apple-icon-57x57.png
│ │ ├── apple-icon-60x60.png
│ │ ├── apple-icon-72x72.png
│ │ ├── apple-icon-76x76.png
│ │ ├── ms-icon-144x144.png
│ │ ├── ms-icon-150x150.png
│ │ ├── ms-icon-310x310.png
│ │ ├── android-icon-36x36.png
│ │ ├── android-icon-48x48.png
│ │ ├── android-icon-72x72.png
│ │ ├── android-icon-96x96.png
│ │ ├── apple-icon-114x114.png
│ │ ├── apple-icon-120x120.png
│ │ ├── apple-icon-144x144.png
│ │ ├── apple-icon-152x152.png
│ │ ├── apple-icon-180x180.png
│ │ ├── android-icon-144x144.png
│ │ ├── android-icon-192x192.png
│ │ ├── apple-icon-precomposed.png
│ │ ├── browserconfig.xml
│ │ └── manifest.json
│ └── config.js
├── README.md
├── .gitignore
└── package.json
├── src
├── core
│ ├── .gitignore
│ ├── svm
│ │ ├── SvmJsDemo.js
│ │ ├── SvmPerfDemo.js
│ │ ├── SvmLinearDemo.js
│ │ ├── SvmLinearMulticlassDemo.js
│ │ ├── SvmJs.js
│ │ └── svmcommon.js
│ ├── neural
│ │ └── NeuralNetwork.js
│ ├── winnow
│ │ └── WinnowHashDemo.js
│ └── multilabel
│ │ ├── index.js
│ │ ├── multilabelutils.js
│ │ ├── BinaryRelevanceDemo.js
│ │ └── MetaLabeler.js
├── features
│ ├── LowerCaseNormalizer.js
│ ├── README.md
│ ├── NGramsOfWords.js
│ ├── NGramsFromArray.js
│ ├── RegexpNormalizer.js
│ ├── NGramsOfLetters.js
│ ├── RegexpSplitter.js
│ ├── HypernymExtractor.js
│ ├── index.js
│ └── FeatureLookupTable.js
├── formats
│ ├── index.js
│ ├── json.js
│ ├── tsv.js
│ ├── svmlight.js
│ └── arff.js
├── utils
│ ├── index.js
│ ├── hamming.js
│ ├── list.js
│ ├── unseen_correlation.js
│ └── partitions.js
└── index.js
├── test
├── mocha.opts
├── tempfiles
│ └── .gitignore
├── wordcounts.js
├── sorted.js
├── utilsTest
│ ├── HammingDistanceTest.js
│ ├── ListTest.js
│ └── PartitionsTest.js
├── test_utils.js
├── generaterandom.js
├── featuresTest
│ ├── FeatureLookupTableTest.js
│ ├── RegexpNormalizerTest.js
│ ├── RegexpSplitterTest.js
│ └── FeatureExtractorTest.js
└── classifiersTest
│ ├── multilabel
│ ├── ClassifierWithSplitterTest.js
│ ├── MulticlassSegmentationBayesTest.js
│ ├── MetaLabelerLanguageModelTest.js
│ ├── MetaLabelerSvmTest.js
│ └── MetaLabelerWinnowTest.js
│ ├── WinnowExampleTest.js
│ ├── NeuralWithSpellCheckerTest.js
│ ├── SvmJsTest.js
│ ├── NeuralWithFeatureExtractorTest.js
│ ├── NeuralWithNormalizerTest.js
│ ├── WinnowTest.js
│ ├── SvmMulticlassTest.js
│ └── SvmTest.js
├── .babelrc
├── renovate.json
├── .github
├── ISSUE_TEMPLATE
│ ├── custom.md
│ ├── feature_request.md
│ └── bug_report.md
└── FUNDING.yml
├── index.js
├── dist
├── features
│ ├── LowerCaseNormalizer.js
│ ├── NGramsOfWords.js
│ ├── NGramsFromArray.js
│ ├── RegexpNormalizer.js
│ ├── NGramsOfLetters.js
│ ├── RegexpSplitter.js
│ ├── HypernymExtractor.js
│ ├── index.js
│ └── FeatureLookupTable.js
├── utils
│ ├── index.js
│ ├── hamming.js
│ ├── list.js
│ ├── unseen_correlation.js
│ └── partitions.js
├── formats
│ ├── index.js
│ ├── json.js
│ ├── tsv.js
│ ├── svmlight.js
│ └── arff.js
├── core
│ ├── svm
│ │ ├── SvmJsDemo.js
│ │ ├── SvmPerfDemo.js
│ │ ├── SvmLinearDemo.js
│ │ ├── SvmLinearMulticlassDemo.js
│ │ ├── SvmJs.js
│ │ └── svmcommon.js
│ ├── neural
│ │ └── NeuralNetwork.js
│ ├── decisiontree
│ │ ├── DecisionTreeDemo.js
│ │ └── DecisionTree.js
│ ├── winnow
│ │ └── WinnowHashDemo.js
│ └── multilabel
│ │ ├── index.js
│ │ ├── multilabelutils.js
│ │ ├── BinaryRelevanceDemo.js
│ │ └── MetaLabeler.js
└── index.js
├── .gitignore
├── package.json
├── README.md
└── CODE_OF_CONDUCT.md
/docs/CNAME:
--------------------------------------------------------------------------------
1 | neuro.js.org
--------------------------------------------------------------------------------
/src/core/.gitignore:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/mocha.opts:
--------------------------------------------------------------------------------
1 | --recursive
2 |
--------------------------------------------------------------------------------
/test/tempfiles/.gitignore:
--------------------------------------------------------------------------------
1 | /Svm*.*
--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": ["@babel/preset-env"]
3 | }
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "config:base"
4 | ]
5 | }
6 |
--------------------------------------------------------------------------------
/docs/.vuepress/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/logo.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon.ico
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-16x16.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-32x32.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-96x96.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-70x70.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-57x57.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-60x60.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-72x72.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-76x76.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-144x144.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-150x150.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-310x310.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-36x36.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-48x48.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-72x72.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-96x96.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-114x114.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-120x120.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-144x144.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-152x152.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-180x180.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-144x144.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-192x192.png
--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-precomposed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-precomposed.png
--------------------------------------------------------------------------------
/src/features/LowerCaseNormalizer.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Normalizes a sentence by converting it to lower case.
3 | */
4 | export default function (sample) {
5 | return sample.toLowerCase();
6 | };
7 |
--------------------------------------------------------------------------------
/src/formats/index.js:
--------------------------------------------------------------------------------
1 | export const arff = require("./arff");
2 | export const json = require("./json");
3 | export const tsv = require("./tsv");
4 | export const svmlight = require("./svmlight");
5 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Custom issue template
3 | about: Describe this issue template's purpose here.
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | export const classifiers = require('./dist/core');
2 | export const features = require('./dist/features');
3 | export const formats = require('./dist/formats');
4 | export const utils = require('./dist/utils');
5 |
--------------------------------------------------------------------------------
/src/features/README.md:
--------------------------------------------------------------------------------
1 | This folder should contain several kinds of feature extractors.
2 |
3 | A feature extractor is a function that takes an input object, and returns a features object for that object, for use in training and/or classification.
4 |
--------------------------------------------------------------------------------
/docs/.vuepress/public/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 | #ffffff
--------------------------------------------------------------------------------
/dist/features/LowerCaseNormalizer.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * Normalizes a sentence by converting it to lower case.
10 | */
11 | function _default(sample) {
12 | return sample.toLowerCase();
13 | }
14 |
15 | ;
--------------------------------------------------------------------------------
/src/utils/index.js:
--------------------------------------------------------------------------------
1 | var trainAndTest = require("./trainAndTest");
2 | module.exports = {
3 | hash: require("./hash"),
4 | partitions: require("./partitions"),
5 | PrecisionRecall: require("./PrecisionRecall"),
6 | test: trainAndTest.test,
7 | compare: trainAndTest.compare,
8 | hammingDistance: require("./hamming").hammingDistance,
9 | };
10 |
--------------------------------------------------------------------------------
/dist/utils/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | var trainAndTest = require("./trainAndTest");
4 |
5 | module.exports = {
6 | hash: require("./hash"),
7 | partitions: require("./partitions"),
8 | PrecisionRecall: require("./PrecisionRecall"),
9 | test: trainAndTest.test,
10 | compare: trainAndTest.compare,
11 | hammingDistance: require("./hamming").hammingDistance
12 | };
--------------------------------------------------------------------------------
/test/wordcounts.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Simple calculation of word-counts in a sentence.
3 | * @param sentence
4 | * @return a hash {word1: count1, word2: count2,...}
5 | * words are separated by spaces.
6 | */
7 | export default function (sentence) {
8 | return sentence.split(' ').reduce(function (counts, word) {
9 | counts[word] = (counts[word] || 0) + 1;
10 | return counts;
11 | }, {});
12 | }
--------------------------------------------------------------------------------
/src/formats/json.js:
--------------------------------------------------------------------------------
1 | /**
2 | * convert a single dataset to compact JSON format.
3 | * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
4 | */
5 | export function toJSON(dataset) {
6 | json = "[";
7 | for (var i=0; i0? "\n, ": "\n ")+
10 | JSON.stringify(dataset[i]));
11 | }
12 | json += "\n]\n";
13 | return json;
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/src/utils/hamming.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Calculate Hamming distance between two sets
3 | * @param a, b - arrays
4 | * @return number of elements in a-b plus number of elements in b-a
5 | */
6 | export function hammingDistance(a, b) {
7 | var d = 0;
8 | for (var i = 0; i < a.length; ++i) {
9 | if (b.indexOf(a[i]) < 0) d++;
10 | }
11 | for (var i = 0; i < b.length; ++i) {
12 | if (a.indexOf(b[i]) < 0) d++;
13 | }
14 | return d;
15 | }
16 |
--------------------------------------------------------------------------------
/src/features/NGramsOfWords.js:
--------------------------------------------------------------------------------
1 | /**
2 | * NGramExtractor - extracts sequences of words in a text as its features.
3 | */
4 |
5 | import NGramsFromArray from './NGramsFromArray';
6 | export default function (numOfWords, gap) {
7 | return function (sample, features) {
8 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
9 | return !!a
10 | }); // all non-empty words
11 | NGramsFromArray(numOfWords, gap, words, features);
12 | };
13 | };
14 |
--------------------------------------------------------------------------------
/dist/formats/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.svmlight = exports.tsv = exports.json = exports.arff = void 0;
7 |
8 | var arff = require("./arff");
9 |
10 | exports.arff = arff;
11 |
12 | var json = require("./json");
13 |
14 | exports.json = json;
15 |
16 | var tsv = require("./tsv");
17 |
18 | exports.tsv = tsv;
19 |
20 | var svmlight = require("./svmlight");
21 |
22 | exports.svmlight = svmlight;
--------------------------------------------------------------------------------
/test/sorted.js:
--------------------------------------------------------------------------------
1 | /**
2 | * This unit adds a non-intrusive property "sorted" to the Array prototype.
3 | *
4 | * It is used only for testing, when the order of the output array is not important. For example:
5 | *
6 | * classifier.classify("I want aa bb").sorted().should.eql(['A','B']);
7 | *
8 | * @author Erel Segal-Halevi
9 | * @since 2013-09-09
10 | */
11 |
12 | Object.defineProperty(Array.prototype, 'sorted', {
13 | value: function() { this.sort(); return this; }
14 | });
15 |
--------------------------------------------------------------------------------
/src/formats/tsv.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Small utility for writing a dataset in tab-separated-values format.
3 | *
4 | * @author Erel Segal-Halevi
5 | * @since 2013-08
6 | */
7 |
8 |
9 | /**
10 | * Write the dataset, one sample per line, with the given separator between sample and output.
11 | */
12 | exports.toTSV = function(dataset, separator) {
13 | if (!separator) {
14 | separator="\t";
15 | }
16 | dataset.forEach(function(sample) {
17 | console.log(JSON.stringify(sample.input)+separator+"["+sample.output+"]");
18 | });
19 | };
20 |
--------------------------------------------------------------------------------
/dist/formats/json.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.toJSON = toJSON;
7 |
8 | /**
9 | * convert a single dataset to compact JSON format.
10 | * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
11 | */
12 | function toJSON(dataset) {
13 | json = "[";
14 |
15 | for (var i = 0; i < dataset.length; ++i) {
16 | json += (i > 0 ? "\n, " : "\n ") + JSON.stringify(dataset[i]);
17 | }
18 |
19 | json += "\n]\n";
20 | return json;
21 | }
--------------------------------------------------------------------------------
/dist/formats/tsv.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /**
4 | * Small utility for writing a dataset in tab-separated-values format.
5 | *
6 | * @author Erel Segal-Halevi
7 | * @since 2013-08
8 | */
9 |
10 | /**
11 | * Write the dataset, one sample per line, with the given separator between sample and output.
12 | */
13 | exports.toTSV = function (dataset, separator) {
14 | if (!separator) {
15 | separator = "\t";
16 | }
17 |
18 | dataset.forEach(function (sample) {
19 | console.log(JSON.stringify(sample.input) + separator + "[" + sample.output + "]");
20 | });
21 | };
--------------------------------------------------------------------------------
/dist/utils/hamming.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.hammingDistance = hammingDistance;
7 |
8 | /**
9 | * Calculate Hamming distance between two sets
10 | * @param a, b - arrays
11 | * @return number of elements in a-b plus number of elements in b-a
12 | */
13 | function hammingDistance(a, b) {
14 | var d = 0;
15 |
16 | for (var i = 0; i < a.length; ++i) {
17 | if (b.indexOf(a[i]) < 0) d++;
18 | }
19 |
20 | for (var i = 0; i < b.length; ++i) {
21 | if (a.indexOf(b[i]) < 0) d++;
22 | }
23 |
24 | return d;
25 | }
--------------------------------------------------------------------------------
/test/utilsTest/HammingDistanceTest.js:
--------------------------------------------------------------------------------
1 | #!mocha
2 |
3 | /**
4 | * a unit-test for Multi-Label classification
5 | */
6 |
7 | import { hammingDistance } from "../../dist/utils";
8 |
9 | describe("Hamming distance", function() {
10 | it("calculates hamming distance", function() {
11 | hammingDistance([], []).should.equal(0);
12 | hammingDistance(["a"], []).should.equal(1);
13 | hammingDistance([], ["a"]).should.equal(1);
14 | hammingDistance(["a"], ["a"]).should.equal(0);
15 | hammingDistance(["a"], ["b"]).should.equal(2);
16 | hammingDistance(["a", "b"], ["b", "c"]).should.equal(2);
17 | });
18 | });
19 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: turtuvshin
5 | open_collective: intelligo
6 | ko_fi: turtuvshin
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
--------------------------------------------------------------------------------
/src/features/NGramsFromArray.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
3 | */
4 | export default function (numOfWords, gap, grams, features) {
5 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
6 | grams.unshift("[start]");
7 | grams.push("[end]");
8 | }
9 | for (var i = 0; i <= grams.length - numOfWords; ++i) {
10 | let sliceOfWords = grams.slice(i, i + numOfWords);
11 | if (gap) sliceOfWords[1] = "-";
12 | let feature = sliceOfWords.join(" ");
13 | features[feature.trim()] = 1;
14 | }
15 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
16 | grams.pop();
17 | grams.shift();
18 | }
19 | };
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/src/core/svm/SvmJsDemo.js:
--------------------------------------------------------------------------------
1 | // simple demonstration of SVM
2 |
3 | var SvmJs = require('./SvmJs');
4 |
5 | var svm = new SvmJs({C: 1.0});
6 |
7 | var traindata = [
8 | {input: [0,0], output: 0},
9 | {input: [0,1], output: 0},
10 | {input: [1,0], output: 1},
11 | {input: [1,1], output: 1},
12 | ];
13 |
14 | svm.trainBatch(traindata);
15 |
16 | console.dir(svm.classify([0,2])); // 0
17 | console.dir(svm.classify([1,3])); // 1
18 |
19 | // explain:
20 | console.dir(svm.classify([0,2], 3)); // 0
21 | console.dir(svm.classify([1,3], 3)); // 1
22 |
23 |
24 | //continuous output:
25 | console.dir(svm.classify([0,2], 0, true)); // -1
26 | console.dir(svm.classify([1,3], 0, true)); // 1
27 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | home: true
3 | heroImage: /logo.png
4 | heroText: Neuro
5 | tagline: Neuro.js is machine learning framework for building AI assistants and chat-bots.
6 | actionText: Get Started →
7 | actionLink: /learn/
8 | features:
9 | - title: ML with Javascript
10 | details: Neuro is a library for developing and training ML models in JavaScript, and deploying in browser or on Node.js
11 | - title: Awesome Feature
12 | details: Neuro is a supports Multi label classification, online learning, real-time classification.
13 | - title: Simplicity & performance
14 | details: Everyone should have access to simple machine learning. Practical machine learning should be simple.
15 | footer: MIT Licensed | Powered by Intelligo Systems
16 | ---
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | import EnhancedClassifier from "./core/EnhancedClassifier";
2 | import multilabel from "./core/multilabel";
3 | import NeuralNetwork from "./core/neural/NeuralNetwork";
4 | import SvmJs from "./core/svm/SvmJs";
5 | import SvmLinear from "./core/svm/SvmLinear";
6 | import SvmPerf from "./core/svm/SvmPerf";
7 | import Winnow from "./core/winnow/WinnowHash";
8 | import features from "./features";
9 | import formats from "./formats";
10 | import utils from "./utils";
11 |
12 | export default {
13 | classifiers: {
14 | NeuralNetwork,
15 | SvmJs,
16 | SvmLinear,
17 | SvmPerf,
18 | Winnow,
19 | multilabel,
20 | EnhancedClassifier
21 | },
22 | features,
23 | formats,
24 | utils
25 | };
26 |
--------------------------------------------------------------------------------
/src/core/neural/NeuralNetwork.js:
--------------------------------------------------------------------------------
1 | /**
2 | * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain
3 | *
4 | * @author Erel Segal-haLevi
5 | * @since 2013-09-29
6 | */
7 |
8 | var NeuralNetwork = require('brain.js').NeuralNetwork;
9 |
10 | NeuralNetwork.prototype.trainOnline = function () {throw new Error("NeuralNetwork does not support online training");};
11 | NeuralNetwork.prototype.train = function(dataset) {
12 | dataset.forEach(function(datum) {
13 | if (!Array.isArray(datum.output) && !(datum.output instanceof Object)){
14 | datum.output = [datum.output];
15 | }
16 | });
17 | this.train(dataset);
18 | };
19 | NeuralNetwork.prototype.classify = NeuralNetwork.prototype.run;
20 |
21 | module.exports = NeuralNetwork;
22 |
--------------------------------------------------------------------------------
/dist/core/svm/SvmJsDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | // simple demonstration of SVM
4 | var SvmJs = require('./SvmJs');
5 |
6 | var svm = new SvmJs({
7 | C: 1.0
8 | });
9 | var traindata = [{
10 | input: [0, 0],
11 | output: 0
12 | }, {
13 | input: [0, 1],
14 | output: 0
15 | }, {
16 | input: [1, 0],
17 | output: 1
18 | }, {
19 | input: [1, 1],
20 | output: 1
21 | }];
22 | svm.trainBatch(traindata);
23 | console.dir(svm.classify([0, 2])); // 0
24 |
25 | console.dir(svm.classify([1, 3])); // 1
26 | // explain:
27 |
28 | console.dir(svm.classify([0, 2], 3)); // 0
29 |
30 | console.dir(svm.classify([1, 3], 3)); // 1
31 | //continuous output:
32 |
33 | console.dir(svm.classify([0, 2], 0, true)); // -1
34 |
35 | console.dir(svm.classify([1, 3], 0, true)); // 1
--------------------------------------------------------------------------------
/dist/features/NGramsOfWords.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | var _NGramsFromArray = _interopRequireDefault(require("./NGramsFromArray"));
9 |
10 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
11 |
12 | /**
13 | * NGramExtractor - extracts sequences of words in a text as its features.
14 | */
15 | function _default(numOfWords, gap) {
16 | return function (sample, features) {
17 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
18 | return !!a;
19 | }); // all non-empty words
20 |
21 | (0, _NGramsFromArray["default"])(numOfWords, gap, words, features);
22 | };
23 | }
24 |
25 | ;
--------------------------------------------------------------------------------
/src/core/winnow/WinnowHashDemo.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Demonstrates the winnow classification algorithm.
3 | *
4 | * @author Erel Segal-Halevi
5 | * @since 2013-07
6 | */
7 |
8 | console.log("Winnow demo start");
9 | var Winnow = require('./WinnowHash');
10 |
11 | var classifier = new Winnow({
12 | default_positive_weight: 1,
13 | default_negative_weight: 1,
14 | threshold: 0,
15 | do_averaging: false,
16 | margin: 1,
17 | });
18 |
19 | classifier.trainOnline({'a': 1, 'b': 0}, 0);
20 | classifier.trainOnline({'a': 0, 'b': 1}, 0);
21 | classifier.trainOnline({'a': 0, 'b': 0}, 1);
22 |
23 | console.dir(classifier.classify({'a': 0, 'b': 0}, /*explain=*/1));
24 | console.dir(classifier.classify({'a': 1, 'b': 1}, /*explain=*/3));
25 |
26 | console.log("Winnow demo end");
27 |
--------------------------------------------------------------------------------
/dist/core/neural/NeuralNetwork.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /**
4 | * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain
5 | *
6 | * @author Erel Segal-haLevi
7 | * @since 2013-09-29
8 | */
9 | var NeuralNetwork = require('brain.js').NeuralNetwork;
10 |
11 | NeuralNetwork.prototype.trainOnline = function () {
12 | throw new Error("NeuralNetwork does not support online training");
13 | };
14 |
15 | NeuralNetwork.prototype.train = function (dataset) {
16 | dataset.forEach(function (datum) {
17 | if (!Array.isArray(datum.output) && !(datum.output instanceof Object)) {
18 | datum.output = [datum.output];
19 | }
20 | });
21 | this.train(dataset);
22 | };
23 |
24 | NeuralNetwork.prototype.classify = NeuralNetwork.prototype.run;
25 | module.exports = NeuralNetwork;
--------------------------------------------------------------------------------
/src/core/multilabel/index.js:
--------------------------------------------------------------------------------
1 | export const BinaryRelevance = require('./BinaryRelevance');
2 | export const BinarySegmentation = require('./BinarySegmentation');
3 | export const MulticlassSegmentation = require('./MulticlassSegmentation');
4 | export const MetaLabeler = require('./MetaLabeler');
5 | export const CrossLanguageModel = require('./CrossLangaugeModelClassifier');
6 | export const ThresholdClassifier = require('./ThresholdClassifier');
7 |
8 | // add a "classify and log" method to all classifiers, for demos:
9 | for (var classifierClass in module.exports) {
10 | if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify)
11 | module.exports[classifierClass].prototype.classifyAndLog = function(sample) {
12 | console.log(sample+" is "+this.classify(sample));
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/dist/features/NGramsFromArray.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
10 | */
11 | function _default(numOfWords, gap, grams, features) {
12 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
13 | grams.unshift("[start]");
14 | grams.push("[end]");
15 | }
16 |
17 | for (var i = 0; i <= grams.length - numOfWords; ++i) {
18 | var sliceOfWords = grams.slice(i, i + numOfWords);
19 | if (gap) sliceOfWords[1] = "-";
20 | var feature = sliceOfWords.join(" ");
21 | features[feature.trim()] = 1;
22 | }
23 |
24 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
25 | grams.pop();
26 | grams.shift();
27 | }
28 | }
29 |
30 | ;
--------------------------------------------------------------------------------
/dist/core/decisiontree/DecisionTreeDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | var _DecisionTree = _interopRequireDefault(require("./DecisionTree"));
4 |
5 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
6 |
7 | console.log("Decision Tree demo start");
8 | var classifier = new _DecisionTree["default"]({});
9 | dataset = [{
10 | input: {
11 | a: 1,
12 | b: 0
13 | },
14 | output: 0
15 | }, {
16 | input: {
17 | a: 0,
18 | b: 1
19 | },
20 | output: 0
21 | }, {
22 | input: {
23 | a: 0,
24 | b: 0
25 | },
26 | output: 1
27 | }];
28 | classifier.trainBatch(dataset);
29 | console.dir(classifier.classify({
30 | 'a': 0,
31 | 'b': 0
32 | },
33 | /*explain=*/
34 | 1));
35 | console.dir(classifier.classify({
36 | 'a': 1,
37 | 'b': 1
38 | },
39 | /*explain=*/
40 | 3));
41 | console.log("Decision Tree demo end");
--------------------------------------------------------------------------------
/dist/core/winnow/WinnowHashDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /**
4 | * Demonstrates the winnow classification algorithm.
5 | *
6 | * @author Erel Segal-Halevi
7 | * @since 2013-07
8 | */
9 | console.log("Winnow demo start");
10 |
11 | var Winnow = require('./WinnowHash');
12 |
13 | var classifier = new Winnow({
14 | default_positive_weight: 1,
15 | default_negative_weight: 1,
16 | threshold: 0,
17 | do_averaging: false,
18 | margin: 1
19 | });
20 | classifier.trainOnline({
21 | 'a': 1,
22 | 'b': 0
23 | }, 0);
24 | classifier.trainOnline({
25 | 'a': 0,
26 | 'b': 1
27 | }, 0);
28 | classifier.trainOnline({
29 | 'a': 0,
30 | 'b': 0
31 | }, 1);
32 | console.dir(classifier.classify({
33 | 'a': 0,
34 | 'b': 0
35 | },
36 | /*explain=*/
37 | 1));
38 | console.dir(classifier.classify({
39 | 'a': 1,
40 | 'b': 1
41 | },
42 | /*explain=*/
43 | 3));
44 | console.log("Winnow demo end");
--------------------------------------------------------------------------------
/docs/.vuepress/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "App",
3 | "icons": [
4 | {
5 | "src": "\/android-icon-36x36.png",
6 | "sizes": "36x36",
7 | "type": "image\/png",
8 | "density": "0.75"
9 | },
10 | {
11 | "src": "\/android-icon-48x48.png",
12 | "sizes": "48x48",
13 | "type": "image\/png",
14 | "density": "1.0"
15 | },
16 | {
17 | "src": "\/android-icon-72x72.png",
18 | "sizes": "72x72",
19 | "type": "image\/png",
20 | "density": "1.5"
21 | },
22 | {
23 | "src": "\/android-icon-96x96.png",
24 | "sizes": "96x96",
25 | "type": "image\/png",
26 | "density": "2.0"
27 | },
28 | {
29 | "src": "\/android-icon-144x144.png",
30 | "sizes": "144x144",
31 | "type": "image\/png",
32 | "density": "3.0"
33 | },
34 | {
35 | "src": "\/android-icon-192x192.png",
36 | "sizes": "192x192",
37 | "type": "image\/png",
38 | "density": "4.0"
39 | }
40 | ]
41 | }
--------------------------------------------------------------------------------
/src/features/RegexpNormalizer.js:
--------------------------------------------------------------------------------
1 | /**
2 | * normalizes a sentence based on a list of regular expressions.
3 | * @param normalizations - an array of objects {source: /regexp/g, target: "target"}
4 | * @param sample - a string.
5 | * @return a new string, with all normalizations carried out.
6 | */
7 | export default function (normalizations) {
8 | return function (sample) {
9 | normalizations.forEach(function (normalization) {
10 | var matches = null;
11 | if (normalization.source instanceof RegExp) {
12 | if (!normalization.source.global) {
13 | console.warn("normalization source, " + normalization.source + ", is not global - skipping");
14 | return;
15 | }
16 | } else {
17 | normalization.source = new RegExp(normalization.source, "gi");
18 | }
19 | sample = sample.replace(normalization.source, normalization.target);
20 | //console.log(sample);
21 | });
22 | return sample;
23 | };
24 | };
25 |
--------------------------------------------------------------------------------
/src/features/NGramsOfLetters.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Extracts substrings of letters of a given size.
3 | */
4 |
5 | var PAD_CHAR = '#';
6 |
7 | /**
8 | * Add letter n-gram features to the given feature-vector.
9 | *
10 | * @param numOfLetters - a positive integer.
11 | * @param caseSensitive - boolean. if false, convert all to lower case.
12 | * @param sample - a string.
13 | * @param features an initial hash of features (optional).
14 | * @return a hash with all the different letter n-grams contained in the given sentence.
15 | */
16 | export default function (numOfLetters, caseSensitive) {
17 | return function (sample, features) {
18 | if (!caseSensitive) sample = sample.toLowerCase();
19 | for (var i = 0; i < numOfLetters - 1; ++i)
20 | sample = PAD_CHAR + sample + PAD_CHAR;
21 | for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) {
22 | var feature = sample.substr(firstLetter, numOfLetters);
23 | features[feature] = 1;
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Smartphone (please complete the following information):**
32 | - Device: [e.g. iPhone6]
33 | - OS: [e.g. iOS8.1]
34 | - Browser [e.g. stock browser, safari]
35 | - Version [e.g. 22]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/test/test_utils.js:
--------------------------------------------------------------------------------
1 | /*
2 | Module test_utils contains helpful routines for running test of existing classifiers,
3 | currently both of them are the copy from different modules
4 |
5 | */
6 | import PrecisionRecall from '../dist/utils/PrecisionRecall';
7 |
8 | export function test(dataset, classifier) {
9 | var currentStats = new PrecisionRecall();
10 | for (var i = 0; i < dataset.length; ++i) {
11 | var expectedClasses = dataset[i].output;
12 | var actualClasses = classifier.classify(dataset[i].input);
13 | currentStats.addCasesHash(expectedClasses, actualClasses, true);
14 | }
15 | return currentStats;
16 | }
17 | export function F1_evaluation(stats, type_of_averaging) {
18 | if (type_of_averaging == 0) {
19 | if ((stats['TP'] == 0) || (stats['TP'] + stats['FP'] == 0) || (stats['TP'] + stats['FN'] == 0))
20 | return 0;
21 | var precision = stats['TP'] / (stats['TP'] + stats['FP']);
22 | var recall = stats['TP'] / (stats['TP'] + stats['FN']);
23 | var f1 = (precision * recall) / (precision + recall);
24 | return f1;
25 | }
26 | }
--------------------------------------------------------------------------------
/test/generaterandom.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Generating random string with given number of words and generating random list
3 | * with given length with element from the given list
4 | */
5 |
6 | export function random_string(length) {
7 | var chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz';
8 | length = length ? length : 10;
9 | var string = '';
10 | for (var i = 0; i < length; i++) {
11 | var word_length = Math.floor(Math.random() * 10 + 1);
12 | for (var j = 0; j <= word_length; j++) {
13 | var randomNumber = Math.floor(Math.random() * chars.length);
14 | var ch = chars.substring(randomNumber, randomNumber + 1);
15 | string += ch;
16 | }
17 | string += " ";
18 | }
19 | return string;
20 | }
21 | export function random_list_length(list) {
22 | return this.random_list(Math.floor(Math.random() * 5), list);
23 | }
24 | export function random_list(length, list) {
25 | var result = [];
26 | for (var i = 0; i < length; i++) {
27 | result.push(list[Math.floor(Math.random() * list.length)]);
28 | }
29 | return result;
30 | }
--------------------------------------------------------------------------------
/src/core/svm/SvmPerfDemo.js:
--------------------------------------------------------------------------------
1 | // simple demonstration of binary SVM, based on SVM-Perf
2 |
3 | var SvmPerf = require('./SvmPerf');
4 |
5 | var trainSet = [
6 | {input: [0,0], output: 0},
7 | {input: [1,1], output: 0},
8 | {input: [0,1], output: 1},
9 | {input: [1,2], output: 1} ];
10 |
11 | // the separating line goes through [0,0.5] and [1,1.5]. It is:
12 | // 0.5+x-y = 0
13 | // or: 2y-2x-1 = 0
14 |
15 |
16 | var classifier = new SvmPerf(
17 | {
18 | learn_args: "-c 20.0",
19 | model_file_prefix: "tempfiles/SvmPerfDemo",
20 | debug:false
21 | }
22 | );
23 | classifier.trainBatch(trainSet);
24 |
25 | // binary output:
26 | console.dir(classifier.classify([0,2])); // 1
27 | console.dir(classifier.classify([1,0])); // 0
28 |
29 | console.dir(classifier.modelMap); // { '0': -1, '1': -2, '2': 2 }
30 |
31 | // explain:
32 | console.dir(classifier.classify([0,2], 3)); // 1
33 | console.dir(classifier.classify([1,0], 3)); // 0
34 |
35 | // continuous output:
36 | console.dir(classifier.classify([0,2], 0, true)); // 3
37 | console.dir(classifier.classify([1,0], 0, true)); // -3
38 |
--------------------------------------------------------------------------------
/dist/features/RegexpNormalizer.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * normalizes a sentence based on a list of regular expressions.
10 | * @param normalizations - an array of objects {source: /regexp/g, target: "target"}
11 | * @param sample - a string.
12 | * @return a new string, with all normalizations carried out.
13 | */
14 | function _default(normalizations) {
15 | return function (sample) {
16 | normalizations.forEach(function (normalization) {
17 | var matches = null;
18 |
19 | if (normalization.source instanceof RegExp) {
20 | if (!normalization.source.global) {
21 | console.warn("normalization source, " + normalization.source + ", is not global - skipping");
22 | return;
23 | }
24 | } else {
25 | normalization.source = new RegExp(normalization.source, "gi");
26 | }
27 |
28 | sample = sample.replace(normalization.source, normalization.target); //console.log(sample);
29 | });
30 | return sample;
31 | };
32 | }
33 |
34 | ;
--------------------------------------------------------------------------------
/src/features/RegexpSplitter.js:
--------------------------------------------------------------------------------
1 | /**
2 | * splits sentences using a custom regular expression.
3 | * @param regexpString - a string
4 | * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence.
5 | * @param text - a string.
6 | * @return an array of parts (sentences).
7 | */
8 | export default function (regexpString, delimitersToInclude) {
9 | regexpString = "(" + regexpString + ")"; // to capture the delimiters
10 | var regexp = new RegExp(regexpString, "i");
11 | if (!delimitersToInclude) delimitersToInclude = {};
12 | return function (text) {
13 | var parts = text.split(regexp);
14 | var normalizedParts = [];
15 | for (var i = 0; i < parts.length; i += 2) {
16 | parts[i] = parts[i].trim();
17 | var part = parts[i];
18 | if (i + 1 < parts.length) {
19 | var delimiter = parts[i + 1];
20 | if (delimitersToInclude[delimiter])
21 | part += " " + delimiter;
22 | }
23 | if (part.length > 0)
24 | normalizedParts.push(part);
25 | }
26 | //console.log(text);
27 | //console.dir(normalizedParts);
28 | return normalizedParts;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/dist/core/svm/SvmPerfDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | // simple demonstration of binary SVM, based on SVM-Perf
4 | var SvmPerf = require('./SvmPerf');
5 |
6 | var trainSet = [{
7 | input: [0, 0],
8 | output: 0
9 | }, {
10 | input: [1, 1],
11 | output: 0
12 | }, {
13 | input: [0, 1],
14 | output: 1
15 | }, {
16 | input: [1, 2],
17 | output: 1
18 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is:
19 | // 0.5+x-y = 0
20 | // or: 2y-2x-1 = 0
21 |
22 | var classifier = new SvmPerf({
23 | learn_args: "-c 20.0",
24 | model_file_prefix: "tempfiles/SvmPerfDemo",
25 | debug: false
26 | });
27 | classifier.trainBatch(trainSet); // binary output:
28 |
29 | console.dir(classifier.classify([0, 2])); // 1
30 |
31 | console.dir(classifier.classify([1, 0])); // 0
32 |
33 | console.dir(classifier.modelMap); // { '0': -1, '1': -2, '2': 2 }
34 | // explain:
35 |
36 | console.dir(classifier.classify([0, 2], 3)); // 1
37 |
38 | console.dir(classifier.classify([1, 0], 3)); // 0
39 | // continuous output:
40 |
41 | console.dir(classifier.classify([0, 2], 0, true)); // 3
42 |
43 | console.dir(classifier.classify([1, 0], 0, true)); // -3
--------------------------------------------------------------------------------
/dist/features/NGramsOfLetters.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * Extracts substrings of letters of a given size.
10 | */
11 | var PAD_CHAR = '#';
12 | /**
13 | * Add letter n-gram features to the given feature-vector.
14 | *
15 | * @param numOfLetters - a positive integer.
16 | * @param caseSensitive - boolean. if false, convert all to lower case.
17 | * @param sample - a string.
18 | * @param features an initial hash of features (optional).
19 | * @return a hash with all the different letter n-grams contained in the given sentence.
20 | */
21 |
22 | function _default(numOfLetters, caseSensitive) {
23 | return function (sample, features) {
24 | if (!caseSensitive) sample = sample.toLowerCase();
25 |
26 | for (var i = 0; i < numOfLetters - 1; ++i) {
27 | sample = PAD_CHAR + sample + PAD_CHAR;
28 | }
29 |
30 | for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) {
31 | var feature = sample.substr(firstLetter, numOfLetters);
32 | features[feature] = 1;
33 | }
34 | };
35 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 |
8 | # Runtime data
9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 |
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 |
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 |
20 | # nyc test coverage
21 | .nyc_output
22 |
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 |
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # TypeScript v1 declaration files
40 | typings/
41 |
42 | # Optional npm cache directory
43 | .npm
44 |
45 | # Optional eslint cache
46 | .eslintcache
47 |
48 | # Optional REPL history
49 | .node_repl_history
50 |
51 | # Output of 'npm pack'
52 | *.tgz
53 |
54 | # Yarn Integrity file
55 | .yarn-integrity
56 |
57 | # dotenv environment variables file
58 | .env
59 |
60 | # next.js build output
61 | .next
62 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 |
8 | # Runtime data
9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 |
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 |
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 |
20 | # nyc test coverage
21 | .nyc_output
22 |
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 |
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # TypeScript v1 declaration files
40 | typings/
41 |
42 | # Optional npm cache directory
43 | .npm
44 |
45 | # Optional eslint cache
46 | .eslintcache
47 |
48 | # Optional REPL history
49 | .node_repl_history
50 |
51 | # Output of 'npm pack'
52 | *.tgz
53 |
54 | # Yarn Integrity file
55 | .yarn-integrity
56 |
57 | # dotenv environment variables file
58 | .env
59 |
60 | # next.js build output
61 | .next
62 |
--------------------------------------------------------------------------------
/test/utilsTest/ListTest.js:
--------------------------------------------------------------------------------
1 | import { isEqual } from "underscore";
2 | import { average, listembed, median, variance } from "../../dist/utils/list";
3 |
4 | describe("List test function", function() {
5 | it("It should correctly calculate Variance", function() {
6 | let list = [170, 300, 430, 470, 600];
7 | variance(list).should.be.equal(21704);
8 | });
9 |
10 | it("it should calculate average correctly", function() {
11 | let list1 = [1, 2, 3, 4, 5, 6, 7];
12 | average(list1).should.be.equal(4);
13 | });
14 |
15 | it("it should calculate median correctly", function() {
16 | var list1 = [3, 8, 9, 1, 5, 7, 9, 21];
17 | median(list1).should.be.equal(7.5);
18 | });
19 |
20 | it("it should know how to do embedding", function() {
21 | isEqual(listembed(["label"]), [["label"]]).should.equal(true);
22 | isEqual(listembed([]), [[]]).should.equal(true);
23 | isEqual(listembed(undefined), [[]]).should.equal(true);
24 | isEqual(listembed(null), [[]]).should.equal(true);
25 | isEqual(
26 | listembed({
27 | classes: "label"
28 | }),
29 | ["label"]
30 | ).should.equal(true);
31 | isEqual(
32 | listembed({
33 | classes: ["label"]
34 | }),
35 | [["label"]]
36 | ).should.equal(true);
37 | });
38 | });
39 |
--------------------------------------------------------------------------------
/dist/features/RegexpSplitter.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * splits sentences using a custom regular expression.
10 | * @param regexpString - a string
11 | * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence.
12 | * @param text - a string.
13 | * @return an array of parts (sentences).
14 | */
15 | function _default(regexpString, delimitersToInclude) {
16 | regexpString = "(" + regexpString + ")"; // to capture the delimiters
17 |
18 | var regexp = new RegExp(regexpString, "i");
19 | if (!delimitersToInclude) delimitersToInclude = {};
20 | return function (text) {
21 | var parts = text.split(regexp);
22 | var normalizedParts = [];
23 |
24 | for (var i = 0; i < parts.length; i += 2) {
25 | parts[i] = parts[i].trim();
26 | var part = parts[i];
27 |
28 | if (i + 1 < parts.length) {
29 | var delimiter = parts[i + 1];
30 | if (delimitersToInclude[delimiter]) part += " " + delimiter;
31 | }
32 |
33 | if (part.length > 0) normalizedParts.push(part);
34 | } //console.log(text);
35 | //console.dir(normalizedParts);
36 |
37 |
38 | return normalizedParts;
39 | };
40 | }
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "neuro.js.org",
3 | "description": "Documentation site for Neuro",
4 | "version": "1.0.0",
5 | "author": "Turtuvshin Byambaa ",
6 | "repository": {
7 | "type": "git",
8 | "url": "https://github.com/intelligo-systems/neuro.git"
9 | },
10 | "engines": {
11 | "node": ">=0.12"
12 | },
13 | "devDependencies": {
14 | "@vuepress/plugin-google-analytics": "1.3.0",
15 | "vuepress": "1.3.0",
16 | "vuepress-plugin-janitor": "1.0.0",
17 | "vuepress-plugin-reading-time": "0.1.1",
18 | "vuepress-plugin-rss": "2.0.0",
19 | "yaml-front-matter": "4.1.0"
20 | },
21 | "scripts": {
22 | "dev": "vuepress dev",
23 | "build": "vuepress build",
24 | "test": "mocha"
25 | },
26 | "contributors": [
27 | {
28 | "name": "Turtuvshin Byambaa",
29 | "email": "toroo.byamba@gmail.com"
30 | }
31 | ],
32 | "keywords": [
33 | "ai",
34 | "ai-bot",
35 | "artificial-intelligence",
36 | "bot",
37 | "chat",
38 | "chatbot",
39 | "classifier",
40 | "conversation",
41 | "framework",
42 | "intelligo",
43 | "intelligence",
44 | "neural network",
45 | "neural",
46 | "neuro",
47 | "network",
48 | "neural-networks",
49 | "machine-learning"
50 | ],
51 | "license": "MIT"
52 | }
53 |
--------------------------------------------------------------------------------
/test/featuresTest/FeatureLookupTableTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for feature lookup tables
3 | */
4 |
5 | import FeatureLookupTable from "../../dist/features/FeatureLookupTable";
6 |
7 | var sample1 = {
8 | a: 111,
9 | b: 222,
10 | c: 333
11 | };
12 | var sample2 = {
13 | a: 1111,
14 | d: 4444,
15 | e: 5555
16 | };
17 | var sample3 = {
18 | c: 33333,
19 | e: 55555,
20 | g: 77777
21 | };
22 |
23 | describe("feature lookup table", function() {
24 | it("adds samples incrementally", function() {
25 | var table = new FeatureLookupTable();
26 | var array1 = table.hashToArray(sample1);
27 | var array2 = table.hashToArray(sample2);
28 | var array3 = table.hashToArray(sample3);
29 | array1.should.be.an.instanceOf(Array);
30 | array2.should.be.an.instanceOf(Array);
31 | array3.should.be.an.instanceOf(Array);
32 | table.arrayToHash(array1).should.eql(sample1);
33 | table.arrayToHash(array2).should.eql(sample2);
34 | table.arrayToHash(array3).should.eql(sample3);
35 | });
36 |
37 | it("adds all samples together", function() {
38 | var table = new FeatureLookupTable();
39 | var arrays = table.hashesToArrays([sample1, sample2, sample3]);
40 | arrays.should.be.an.instanceOf(Array).and.have.lengthOf(3);
41 | table.arraysToHashes(arrays).should.eql([sample1, sample2, sample3]);
42 | });
43 | });
44 |
--------------------------------------------------------------------------------
/dist/features/HypernymExtractor.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = _default;
7 |
8 | /**
9 | * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text.
10 | *
11 | * A hypernym is described by a regular expression, a feature string, and a confidence score.
12 | * For example: if regexp=/no (.*)/ and feature="without $1", then,
13 | * if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1).
14 | *
15 | * Adds hypernym features to the given feature-vector.
16 | *
17 | * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence}
18 | * @param sample - a string.
19 | * @param features an initial hash of features (optional). The hypernym features will be added to that array.
20 | */
21 | function _default(hypernyms) {
22 | return function (sample, features) {
23 | hypernyms.forEach(function (hypernym) {
24 | if (!(hypernym.regexp instanceof RegExp)) {
25 | hypernym.regexp = new RegExp(hypernym.regexp, "gi");
26 | }
27 |
28 | if (hypernym.regexp.test(sample)) {
29 | features[hypernym.feature] = hypernym.confidence;
30 | }
31 | });
32 | };
33 | }
34 |
35 | ;
--------------------------------------------------------------------------------
/src/features/HypernymExtractor.js:
--------------------------------------------------------------------------------
1 | /**
2 | * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text.
3 | *
4 | * A hypernym is described by a regular expression, a feature string, and a confidence score.
5 | * For example: if regexp=/no (.*)/ and feature="without $1", then,
6 | * if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1).
7 | *
8 | * Adds hypernym features to the given feature-vector.
9 | *
10 | * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence}
11 | * @param sample - a string.
12 | * @param features an initial hash of features (optional). The hypernym features will be added to that array.
13 | */
14 | export default function (hypernyms) {
15 | return function (sample, features) {
16 | hypernyms.forEach(function (hypernym) {
17 | if (!(hypernym.regexp instanceof RegExp)) {
18 | hypernym.regexp = new RegExp(hypernym.regexp, "gi");
19 | }
20 | if (hypernym.regexp.test(sample)) {
21 | features[hypernym.feature] = hypernym.confidence;
22 | }
23 | });
24 | };
25 | };
26 |
--------------------------------------------------------------------------------
/src/core/svm/SvmLinearDemo.js:
--------------------------------------------------------------------------------
1 | // simple demonstration of binary SVM, based on LibLinear
2 |
3 | var SvmLinear = require('./SvmLinear');
4 | var partitions = require(__dirname+'/../../utils/partitions');
5 |
6 | var dataset = [
7 | {input: [0,0], output: 0},
8 | {input: [1,1], output: 0},
9 | {input: [0,1], output: 1},
10 | {input: [1,2], output: 1} ];
11 |
12 | // the separating line goes through [0,0.5] and [1,1.5]. It is:
13 | // 0.5+x-y = 0
14 | // or: -1-2x+2y = 0
15 |
16 | var classifier = new SvmLinear(
17 | {
18 | learn_args: "-c 20",
19 | model_file_prefix: "tempfiles/SvmLinearDemo",
20 | train_command: "liblinear_train",
21 | test_command: "liblinear_test",
22 | multiclass: false
23 | }
24 | );
25 | classifier.trainBatch(dataset);
26 |
27 | console.log("simple classification: ");
28 | console.dir(classifier.classify([0,2])); // 1
29 | console.dir(classifier.classify([1,0])); // 0
30 |
31 | console.log("model: ");
32 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
33 |
34 | partitions.partitions(dataset.concat(dataset), 2, function(train, test, index) {
35 |
36 | console.log("fold: "+index)
37 | classifier.trainBatch(train)
38 |
39 | test.forEach(function(instance) {
40 | console.dir("Classify instance:")
41 | console.dir(instance)
42 | console.dir(classifier.classify(instance.input));
43 | });
44 | });
--------------------------------------------------------------------------------
/test/featuresTest/RegexpNormalizerTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Regular Expression Normalizer.
3 | */
4 |
5 | import { RegexpNormalizer } from "../../dist/features";
6 |
7 | describe("RegexpNormalizer", function() {
8 | it("normalizes simple strings", function() {
9 | var ren = RegexpNormalizer([
10 | {
11 | source: "can't",
12 | target: "cannot"
13 | },
14 | {
15 | source: "cannot",
16 | target: "can not"
17 | },
18 | {
19 | source: "won't",
20 | target: "will not"
21 | }
22 | ]);
23 | ren("I can't do it and I won't do it").should.eql(
24 | "I can not do it and I will not do it"
25 | );
26 | });
27 | it("normalizes regular expressions", function() {
28 | var ren = RegexpNormalizer([
29 | {
30 | source: "\\b(...+)est\\b",
31 | target: "$1"
32 | },
33 | {
34 | source: "\\b(...+)er\\b",
35 | target: "$1"
36 | }
37 | ]);
38 | ren("faster and highest").should.eql("fast and high");
39 | });
40 | it("normalizes numbers", function() {
41 | var ren = RegexpNormalizer([
42 | {
43 | source: "\\b(\\d+)k\\b",
44 | target: "$1000"
45 | }
46 | ]);
47 | ren("I want 7k dollars").should.eql("I want 7000 dollars");
48 | ren("I want 70k dollars").should.eql("I want 70000 dollars");
49 | });
50 | });
51 |
--------------------------------------------------------------------------------
/dist/core/multilabel/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.ThresholdClassifier = exports.CrossLanguageModel = exports.MetaLabeler = exports.MulticlassSegmentation = exports.BinarySegmentation = exports.BinaryRelevance = void 0;
7 |
8 | var BinaryRelevance = require('./BinaryRelevance');
9 |
10 | exports.BinaryRelevance = BinaryRelevance;
11 |
12 | var BinarySegmentation = require('./BinarySegmentation');
13 |
14 | exports.BinarySegmentation = BinarySegmentation;
15 |
16 | var MulticlassSegmentation = require('./MulticlassSegmentation');
17 |
18 | exports.MulticlassSegmentation = MulticlassSegmentation;
19 |
20 | var MetaLabeler = require('./MetaLabeler');
21 |
22 | exports.MetaLabeler = MetaLabeler;
23 |
24 | var CrossLanguageModel = require('./CrossLangaugeModelClassifier');
25 |
26 | exports.CrossLanguageModel = CrossLanguageModel;
27 |
28 | var ThresholdClassifier = require('./ThresholdClassifier'); // add a "classify and log" method to all classifiers, for demos:
29 |
30 |
31 | exports.ThresholdClassifier = ThresholdClassifier;
32 |
33 | for (var classifierClass in module.exports) {
34 | if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify) module.exports[classifierClass].prototype.classifyAndLog = function (sample) {
35 | console.log(sample + " is " + this.classify(sample));
36 | };
37 | }
--------------------------------------------------------------------------------
/test/featuresTest/RegexpSplitterTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Regular Expression Splitter.
3 | */
4 |
5 | import { RegexpSplitter } from "../../dist/features";
6 |
7 | describe("RegexpSplitter", function() {
8 | it("splits sentences without delimiter", function() {
9 | var res = RegexpSplitter("[.,;?!]|and");
10 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([
11 | "Hi",
12 | "Who are you",
13 | "I am Intelligo Bot"
14 | ]);
15 | res("Hi.Who are you?I am Intelligo Bot.").should.eql([
16 | "Hi",
17 | "Who are you",
18 | "I am Intelligo Bot"
19 | ]);
20 | res(
21 | "Hi. Who are you? I am Intelligo Bot. "
22 | ).should.eql(["Hi", "Who are you", "I am Intelligo Bot"]);
23 | });
24 | it("splits sentences with delimiter", function() {
25 | var res = RegexpSplitter("[.,;?!]|and", {
26 | "?": true,
27 | ".": false
28 | });
29 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([
30 | "Hi",
31 | "Who are you ?",
32 | "I am Intelligo Bot"
33 | ]);
34 | res("Hi.Who are you?I am Intelligo Bot.").should.eql([
35 | "Hi",
36 | "Who are you ?",
37 | "I am Intelligo Bot"
38 | ]);
39 | res("Hi. Who are you? I am Intelligo Bot.").should.eql([
40 | "Hi",
41 | "Who are you ?",
42 | "I am Intelligo Bot"
43 | ]);
44 | });
45 | });
46 |
--------------------------------------------------------------------------------
/dist/core/svm/SvmLinearDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | // simple demonstration of binary SVM, based on LibLinear
4 | var SvmLinear = require('./SvmLinear');
5 |
6 | var partitions = require(__dirname + '/../../utils/partitions');
7 |
8 | var dataset = [{
9 | input: [0, 0],
10 | output: 0
11 | }, {
12 | input: [1, 1],
13 | output: 0
14 | }, {
15 | input: [0, 1],
16 | output: 1
17 | }, {
18 | input: [1, 2],
19 | output: 1
20 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is:
21 | // 0.5+x-y = 0
22 | // or: -1-2x+2y = 0
23 |
24 | var classifier = new SvmLinear({
25 | learn_args: "-c 20",
26 | model_file_prefix: "tempfiles/SvmLinearDemo",
27 | train_command: "liblinear_train",
28 | test_command: "liblinear_test",
29 | multiclass: false
30 | });
31 | classifier.trainBatch(dataset);
32 | console.log("simple classification: ");
33 | console.dir(classifier.classify([0, 2])); // 1
34 |
35 | console.dir(classifier.classify([1, 0])); // 0
36 |
37 | console.log("model: ");
38 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
39 |
40 | partitions.partitions(dataset.concat(dataset), 2, function (train, test, index) {
41 | console.log("fold: " + index);
42 | classifier.trainBatch(train);
43 | test.forEach(function (instance) {
44 | console.dir("Classify instance:");
45 | console.dir(instance);
46 | console.dir(classifier.classify(instance.input));
47 | });
48 | });
--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports["default"] = void 0;
7 |
8 | var _EnhancedClassifier = _interopRequireDefault(require("./core/EnhancedClassifier"));
9 |
10 | var _multilabel = _interopRequireDefault(require("./core/multilabel"));
11 |
12 | var _NeuralNetwork = _interopRequireDefault(require("./core/neural/NeuralNetwork"));
13 |
14 | var _SvmJs = _interopRequireDefault(require("./core/svm/SvmJs"));
15 |
16 | var _SvmLinear = _interopRequireDefault(require("./core/svm/SvmLinear"));
17 |
18 | var _SvmPerf = _interopRequireDefault(require("./core/svm/SvmPerf"));
19 |
20 | var _WinnowHash = _interopRequireDefault(require("./core/winnow/WinnowHash"));
21 |
22 | var _features = _interopRequireDefault(require("./features"));
23 |
24 | var _formats = _interopRequireDefault(require("./formats"));
25 |
26 | var _utils = _interopRequireDefault(require("./utils"));
27 |
28 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
29 |
30 | var _default = {
31 | classifiers: {
32 | NeuralNetwork: _NeuralNetwork["default"],
33 | SvmJs: _SvmJs["default"],
34 | SvmLinear: _SvmLinear["default"],
35 | SvmPerf: _SvmPerf["default"],
36 | Winnow: _WinnowHash["default"],
37 | multilabel: _multilabel["default"],
38 | EnhancedClassifier: _EnhancedClassifier["default"]
39 | },
40 | features: _features["default"],
41 | formats: _formats["default"],
42 | utils: _utils["default"]
43 | };
44 | exports["default"] = _default;
--------------------------------------------------------------------------------
/src/formats/svmlight.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Small utility for writing a dataset in SVM-light format.
3 | *
4 | * @author Erel Segal-Halevi
5 | * @since 2013-09
6 | */
7 |
8 |
9 | /**
10 | * convert a single dataset to compact JSON format.
11 | * @param dataset an array of samples in the format {input: [value1, value2, ...], output: (0|1)}
12 | * @param bias if nonzero, add it at the beginning of the vector.
13 | * @param binarize if true, change output to -1/1. If false, leave output as it is
14 | */
15 | exports.toSvmLight = function(dataset, bias, binarize, firstFeatureNumber) {
16 | var lines = "";
17 | for (var i=0; i0? "\n": "") +
19 | (binarize? (dataset[i].output>0? "1": "-1"): dataset[i].output) + // in svm-light, the output comes first:
20 | featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber)
21 | ;
22 | lines += line;
23 | };
24 | lines += "\n";
25 | return lines;
26 | }
27 |
28 |
29 |
30 | /**
31 | * convert an array of features to a single line in SVM-light format. The line starts with a space.
32 | */
33 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) {
34 | if (!Array.isArray(features))
35 | throw new Error("Expected an array, but got "+JSON.stringify(features))
36 | var line = (bias? " "+firstFeatureNumber+":"+bias: "");
37 | for (var feature=0; feature 0 ? "\n" : "") + (binarize ? dataset[i].output > 0 ? "1" : "-1" : dataset[i].output) + // in svm-light, the output comes first:
21 | featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber);
22 | lines += line;
23 | }
24 |
25 | ;
26 | lines += "\n";
27 | return lines;
28 | };
29 | /**
30 | * convert an array of features to a single line in SVM-light format. The line starts with a space.
31 | */
32 |
33 |
34 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) {
35 | if (!Array.isArray(features)) throw new Error("Expected an array, but got " + JSON.stringify(features));
36 | var line = bias ? " " + firstFeatureNumber + ":" + bias : "";
37 |
38 | for (var feature = 0; feature < features.length; ++feature) {
39 | var value = features[feature];
40 | if (value) line += " " + (feature + firstFeatureNumber + (bias ? 1 : 0)) + ":" + value.toPrecision(5);
41 | }
42 |
43 | return line;
44 | }
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "neuro.js",
3 | "description": "Neuro.js is machine learning framework for building AI assistants and chat-bots.",
4 | "version": "0.1.7",
5 | "author": "Turtuvshin Byambaa ",
6 | "homepage": "https://neuro.js.org",
7 | "repository": {
8 | "type": "git",
9 | "url": "https://github.com/intelligo-systems/neuro.git"
10 | },
11 | "engines": {
12 | "node": ">=0.12"
13 | },
14 | "dependencies": {
15 | "async": "3.1.1",
16 | "brain.js": "1.6.1",
17 | "graph-paths": "latest",
18 | "intelligo": "1.3.0",
19 | "languagemodel": "latest",
20 | "lodash": "4.17.15",
21 | "sprintf": "0.1.5",
22 | "svm": "0.1.1",
23 | "temp": "0.9.1",
24 | "underscore": "1.9.1",
25 | "wordsworth": "0.1.0"
26 | },
27 | "devDependencies": {
28 | "@babel/cli": "7.8.4",
29 | "@babel/core": "7.8.4",
30 | "@babel/preset-env": "7.8.4",
31 | "@babel/register": "7.8.3",
32 | "mocha": "6.2.2",
33 | "should": "13.2.3"
34 | },
35 | "scripts": {
36 | "start": "node index.js",
37 | "build": "babel src -d dist",
38 | "test": "mocha --require @babel/register"
39 | },
40 | "contributors": [
41 | {
42 | "name": "Turtuvshin Byambaa",
43 | "email": "toroo.byamba@gmail.com"
44 | }
45 | ],
46 | "keywords": [
47 | "ai",
48 | "ai-bot",
49 | "artificial-intelligence",
50 | "bot",
51 | "chat",
52 | "chatbot",
53 | "classifier",
54 | "conversation",
55 | "framework",
56 | "intelligo",
57 | "intelligence",
58 | "neural network",
59 | "neural",
60 | "network",
61 | "neural-networks",
62 | "machine-learning"
63 | ],
64 | "license": "MIT"
65 | }
66 |
--------------------------------------------------------------------------------
/src/utils/list.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Utilities for lists
3 | *
4 | * @author Vasily Konovalov
5 | */
6 | import {
7 | reduce,
8 | isObject,
9 | isArray,
10 | each,
11 | clone
12 | } from 'underscore';
13 |
14 | // Calculating the median of an array basically involves sorting the array and picking the middle number.
15 | // If it’s an even amount of numbers you take the two numbers in the middle and average them.
16 | export function median(values) {
17 | values.sort(function (a, b) {
18 | return a - b;
19 | });
20 | var half = Math.floor(values.length / 2);
21 | if (values.length % 2)
22 | return values[half];
23 | else
24 | return (values[half - 1] + values[half]) / 2.0;
25 | }
26 |
27 | export function variance(list) {
28 | sum = reduce(list, function (memo, num) {
29 | return memo + num;
30 | }, 0);
31 | exp = sum / list.length
32 | sum2 = reduce(list, function (memo, num) {
33 | return memo + num * num;
34 | }, 0);
35 | exp2 = sum2 / list.length
36 | return exp2 - exp * exp
37 | }
38 |
39 | export function average(list) {
40 | let sum = reduce(list, function (memo, num) {
41 | return memo + num;
42 | }, 0);
43 | return sum / list.length
44 | }
45 |
46 | // @input - list
47 | // @output - embedded list
48 | export function listembed(label) {
49 | if ((label === null) || (label == undefined) || (typeof label == 'undefined'))
50 | return [
51 | []
52 | ]
53 | // if (typeof label != 'undefined')
54 | // else
55 | // {
56 | if ((isObject(label)) && !(isArray(label)))
57 | // if ('classes' in JSON.parse(label))
58 | if ('classes' in label)
59 | label = label.classes
60 |
61 | if (!(label[0] instanceof Array))
62 | return [label]
63 | else
64 | return label
65 | // }
66 | // else
67 | // {
68 | // return [label]
69 | // }
70 | }
71 |
72 | export function clonedataset(set) {
73 | set1 = []
74 | each(set, function (value, key, list) {
75 | set1.push(clone(value))
76 | })
77 | return set1
78 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # neuro.js
2 |
3 | [](https://www.npmjs.com/package/neuro.js)
4 | [](https://www.npmjs.com/package/neuro.js)
5 | [](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE)
6 | [](https://twitter.com/intent/tweet?text=Wow:&url=https%3A%2F%2Fgithub.com%2Fintelligo-systems%2Fintelligo)
7 |
8 | Neuro.js is machine learning framework for building AI assistants and chat-bots.
9 |
10 | [](https://nodei.co/npm/neuro.js/)
11 |
12 | | [Installation][] | [Documentation][] | [Contributors][] | [License][] |
13 | |---|---|---|---|
14 |
15 | # Installation
16 |
17 | ```
18 | npm install neuro.js --save
19 | ```
20 |
21 | ## Documentation
22 |
23 | To check out docs, visit [neuro.js.org](https://neuro.js.org).
24 |
25 |
26 | ## Contributors
27 |
28 | You may contribute in several ways like creating new features, fixing bugs, improving documentation and examples
29 | or translating any document here to your language. [Find more information in CODE_OF_CONDUCT.md](.github/CODE_OF_CONDUCT.md).
30 | Contributors
31 |
32 | ## License
33 |
34 | > Copyright (C) 2019 Intelligo LLC.
35 | > neuro.js is open-sourced software licensed under the [MIT](https://opensource.org/licenses/MIT) license.
36 | > (See the [LICENSE](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE) file for the whole license text.)
37 |
38 | **[⬆ back to top](#neuro.js)**
39 |
40 | [Installation]:#installation
41 | [Documentation]:#documentation
42 | [Contributors]:#contributors
43 | [License]:#license
44 |
45 |
--------------------------------------------------------------------------------
/src/core/svm/SvmLinearMulticlassDemo.js:
--------------------------------------------------------------------------------
1 | // simple demonstration of multiclass SVM, based on LibLinear
2 |
3 | var SvmLinear = require('./SvmLinear');
4 |
5 | var trainSet = [
6 | {input: [0,0], output: 3},
7 | {input: [1,1], output: 3},
8 |
9 | {input: [0,1], output: 4},
10 | {input: [1,2], output: 4},
11 |
12 | {input: [0,2], output: 5},
13 | {input: [1,3], output: 5},
14 | ];
15 |
16 | // One separating line goes through [0,0.5] and [1,1.5]. It is:
17 | // 0.5+x-y = 0
18 | // or: -1-2x+2y = 0
19 |
20 | //Another separating line goes through [0,1.5] and [1,2.5]. It is:
21 | // 1.5+x-y = 0
22 | //or: -3-2x+2y = 0
23 |
24 |
25 | var classifier = new SvmLinear(
26 | {
27 | learn_args: "-c 20",
28 | model_file_prefix: "tempfiles/SvmLinearMulticlassDemo",
29 | multiclass: true,
30 | debug: false
31 | }
32 | );
33 | classifier.trainBatch(trainSet);
34 |
35 | console.log("simple classification: ");
36 | console.dir(classifier.classify([1,0])); // 3
37 | console.dir(classifier.classify([0,1.3])); // 4
38 | console.dir(classifier.classify([0,1.7])); // 5
39 | console.dir(classifier.classify([0,3])); // 5
40 |
41 | console.log("model: ");
42 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
43 |
44 | console.log("explained classification: ");
45 | console.dir(classifier.classify([1,0],3)); // 3
46 | console.dir(classifier.classify([0,1.3],3)); // 4
47 | console.dir(classifier.classify([0,1.7],3)); // 5
48 | console.dir(classifier.classify([0,3],3)); // 5
49 |
50 | console.log("classification with scores: ");
51 | console.dir(classifier.classify([1,0],0,true)); // 3
52 | console.dir(classifier.classify([0,1.3],0,true)); // 4
53 | console.dir(classifier.classify([0,1.7],0,true)); // 5
54 | console.dir(classifier.classify([0,3],0,true)); // 5
55 |
56 | console.log("explained classification with scores: ");
57 | console.dir(classifier.classify([1,0],3,true)); // 3
58 | console.dir(classifier.classify([0,1.3],3,true)); // 4
59 | console.dir(classifier.classify([0,1.7],3,true)); // 5
60 | console.dir(classifier.classify([0,3],3,true)); // 5
61 |
--------------------------------------------------------------------------------
/src/core/multilabel/multilabelutils.js:
--------------------------------------------------------------------------------
1 | var _ = require("underscore")._;
2 | /**
3 | * A utility function, used by several multi-label classifiers.
4 | *
5 | * @param scoresVector [[label1,score1],[label2,score2],...]
6 | * @param explain (int) if >0, return explanation.
7 | * @param withScores (boolean) if true, return the original scores vector.
8 | * @param threshold if withScores is false, all labels with scores above this threshold will be returned.
9 | */
10 | module.exports = {
11 | // iterate the list and collect the second item from the every element of the list
12 | getvalue: function (list) {
13 | val = []
14 | for (elem in list)
15 | { val.push(list[elem][1]) }
16 | return val
17 | },
18 |
19 | normalizeClasses: function (expectedClasses) {
20 | if (!_(expectedClasses).isArray())
21 | expectedClasses = [expectedClasses];
22 |
23 | expectedClasses = expectedClasses.map(this.stringifyClass);
24 | expectedClasses.sort();
25 | return expectedClasses;
26 | },
27 |
28 | stringifyClass: function (aClass) {
29 | return (_(aClass).isString()? aClass: JSON.stringify(aClass));
30 | },
31 |
32 | stringifyIfNeeded: function (label) {
33 | return (typeof(label)==='string'? label: JSON.stringify(label));
34 | },
35 |
36 | normalizeOutputLabels: function(labels) {
37 | if (!Array.isArray(labels))
38 | labels = [labels];
39 | return labels.map(module.exports.stringifyIfNeeded);
40 | },
41 |
42 | mapScoresVectorToMultilabelResult: function(scoresVector, explain, withScores, threshold) {
43 | var results;
44 | if (withScores) {
45 | results = scoresVector;
46 | } else {
47 | results = [];
48 | scoresVector.forEach(function(pair) {
49 | if (pair[1]>=threshold)
50 | results.push(pair[0]);
51 | });
52 | }
53 | return explain>0? {
54 | classes: results,
55 | explanation: scoresVector.map(function(pair) {return pair[0]+": "+pair[1];})
56 | }:
57 | results;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/dist/core/multilabel/multilabelutils.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | var _ = require("underscore")._;
4 | /**
5 | * A utility function, used by several multi-label classifiers.
6 | *
7 | * @param scoresVector [[label1,score1],[label2,score2],...]
8 | * @param explain (int) if >0, return explanation.
9 | * @param withScores (boolean) if true, return the original scores vector.
10 | * @param threshold if withScores is false, all labels with scores above this threshold will be returned.
11 | */
12 |
13 |
14 | module.exports = {
15 | // iterate the list and collect the second item from the every element of the list
16 | getvalue: function getvalue(list) {
17 | val = [];
18 |
19 | for (elem in list) {
20 | val.push(list[elem][1]);
21 | }
22 |
23 | return val;
24 | },
25 | normalizeClasses: function normalizeClasses(expectedClasses) {
26 | if (!_(expectedClasses).isArray()) expectedClasses = [expectedClasses];
27 | expectedClasses = expectedClasses.map(this.stringifyClass);
28 | expectedClasses.sort();
29 | return expectedClasses;
30 | },
31 | stringifyClass: function stringifyClass(aClass) {
32 | return _(aClass).isString() ? aClass : JSON.stringify(aClass);
33 | },
34 | stringifyIfNeeded: function stringifyIfNeeded(label) {
35 | return typeof label === 'string' ? label : JSON.stringify(label);
36 | },
37 | normalizeOutputLabels: function normalizeOutputLabels(labels) {
38 | if (!Array.isArray(labels)) labels = [labels];
39 | return labels.map(module.exports.stringifyIfNeeded);
40 | },
41 | mapScoresVectorToMultilabelResult: function mapScoresVectorToMultilabelResult(scoresVector, explain, withScores, threshold) {
42 | var results;
43 |
44 | if (withScores) {
45 | results = scoresVector;
46 | } else {
47 | results = [];
48 | scoresVector.forEach(function (pair) {
49 | if (pair[1] >= threshold) results.push(pair[0]);
50 | });
51 | }
52 |
53 | return explain > 0 ? {
54 | classes: results,
55 | explanation: scoresVector.map(function (pair) {
56 | return pair[0] + ": " + pair[1];
57 | })
58 | } : results;
59 | }
60 | };
--------------------------------------------------------------------------------
/dist/utils/list.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.median = median;
7 | exports.variance = variance;
8 | exports.average = average;
9 | exports.listembed = listembed;
10 | exports.clonedataset = clonedataset;
11 |
12 | var _underscore = require("underscore");
13 |
14 | /**
15 | * Utilities for lists
16 | *
17 | * @author Vasily Konovalov
18 | */
19 | // Calculating the median of an array basically involves sorting the array and picking the middle number.
20 | // If it’s an even amount of numbers you take the two numbers in the middle and average them.
21 | function median(values) {
22 | values.sort(function (a, b) {
23 | return a - b;
24 | });
25 | var half = Math.floor(values.length / 2);
26 | if (values.length % 2) return values[half];else return (values[half - 1] + values[half]) / 2.0;
27 | }
28 |
29 | function variance(list) {
30 | sum = (0, _underscore.reduce)(list, function (memo, num) {
31 | return memo + num;
32 | }, 0);
33 | exp = sum / list.length;
34 | sum2 = (0, _underscore.reduce)(list, function (memo, num) {
35 | return memo + num * num;
36 | }, 0);
37 | exp2 = sum2 / list.length;
38 | return exp2 - exp * exp;
39 | }
40 |
41 | function average(list) {
42 | var sum = (0, _underscore.reduce)(list, function (memo, num) {
43 | return memo + num;
44 | }, 0);
45 | return sum / list.length;
46 | } // @input - list
47 | // @output - embedded list
48 |
49 |
50 | function listembed(label) {
51 | if (label === null || label == undefined || typeof label == 'undefined') return [[]]; // if (typeof label != 'undefined')
52 | // else
53 | // {
54 |
55 | if ((0, _underscore.isObject)(label) && !(0, _underscore.isArray)(label)) // if ('classes' in JSON.parse(label))
56 | if ('classes' in label) label = label.classes;
57 | if (!(label[0] instanceof Array)) return [label];else return label; // }
58 | // else
59 | // {
60 | // return [label]
61 | // }
62 | }
63 |
64 | function clonedataset(set) {
65 | set1 = [];
66 | (0, _underscore.each)(set, function (value, key, list) {
67 | set1.push((0, _underscore.clone)(value));
68 | });
69 | return set1;
70 | }
--------------------------------------------------------------------------------
/dist/core/svm/SvmLinearMulticlassDemo.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | // simple demonstration of multiclass SVM, based on LibLinear
4 | var SvmLinear = require('./SvmLinear');
5 |
6 | var trainSet = [{
7 | input: [0, 0],
8 | output: 3
9 | }, {
10 | input: [1, 1],
11 | output: 3
12 | }, {
13 | input: [0, 1],
14 | output: 4
15 | }, {
16 | input: [1, 2],
17 | output: 4
18 | }, {
19 | input: [0, 2],
20 | output: 5
21 | }, {
22 | input: [1, 3],
23 | output: 5
24 | }]; // One separating line goes through [0,0.5] and [1,1.5]. It is:
25 | // 0.5+x-y = 0
26 | // or: -1-2x+2y = 0
27 | //Another separating line goes through [0,1.5] and [1,2.5]. It is:
28 | // 1.5+x-y = 0
29 | //or: -3-2x+2y = 0
30 |
31 | var classifier = new SvmLinear({
32 | learn_args: "-c 20",
33 | model_file_prefix: "tempfiles/SvmLinearMulticlassDemo",
34 | multiclass: true,
35 | debug: false
36 | });
37 | classifier.trainBatch(trainSet);
38 | console.log("simple classification: ");
39 | console.dir(classifier.classify([1, 0])); // 3
40 |
41 | console.dir(classifier.classify([0, 1.3])); // 4
42 |
43 | console.dir(classifier.classify([0, 1.7])); // 5
44 |
45 | console.dir(classifier.classify([0, 3])); // 5
46 |
47 | console.log("model: ");
48 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
49 |
50 | console.log("explained classification: ");
51 | console.dir(classifier.classify([1, 0], 3)); // 3
52 |
53 | console.dir(classifier.classify([0, 1.3], 3)); // 4
54 |
55 | console.dir(classifier.classify([0, 1.7], 3)); // 5
56 |
57 | console.dir(classifier.classify([0, 3], 3)); // 5
58 |
59 | console.log("classification with scores: ");
60 | console.dir(classifier.classify([1, 0], 0, true)); // 3
61 |
62 | console.dir(classifier.classify([0, 1.3], 0, true)); // 4
63 |
64 | console.dir(classifier.classify([0, 1.7], 0, true)); // 5
65 |
66 | console.dir(classifier.classify([0, 3], 0, true)); // 5
67 |
68 | console.log("explained classification with scores: ");
69 | console.dir(classifier.classify([1, 0], 3, true)); // 3
70 |
71 | console.dir(classifier.classify([0, 1.3], 3, true)); // 4
72 |
73 | console.dir(classifier.classify([0, 1.7], 3, true)); // 5
74 |
75 | console.dir(classifier.classify([0, 3], 3, true)); // 5
--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/ClassifierWithSplitterTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for multi-label classifier with input-splitter (sentence splitter)
3 | */
4 |
5 | import { EnhancedClassifier, multilabel, Winnow } from "../../../dist/core";
6 | import { NGramsOfWords, RegexpSplitter } from "../../../dist/features";
7 |
8 | describe.skip("baseline - classifier without a splitter", function() {
9 | it("should not classify long sentencs", function() {
10 | var classifier = new EnhancedClassifier({
11 | classifierType: multilabel.BinaryRelevance.bind(this, {
12 | binaryClassifierType: Winnow.bind(this, {
13 | retrain_count: 10
14 | })
15 | }),
16 | featureExtractor: NGramsOfWords(1),
17 | inputSplitter: null
18 | });
19 |
20 | classifier.trainBatch([
21 | { input: "I want aa", output: "A" },
22 | { input: "I want bb", output: "B" },
23 | { input: "I want cc", output: "C" }
24 | ]);
25 |
26 | classifier.classify("I want aa").should.eql(["A"]);
27 | classifier.classify("I want bb").should.eql(["B"]);
28 | classifier.classify("I want cc").should.eql(["C"]);
29 | classifier
30 | .classify("I want aa, I want bb, and I want cc")
31 | .should.not.eql(["A", "B", "C"]);
32 | });
33 | });
34 |
35 | describe.skip("classifier with a splitter", function() {
36 | it("should classify long sentencs", function() {
37 | var classifier = new EnhancedClassifier({
38 | classifierType: multilabel.BinaryRelevance.bind(this, {
39 | binaryClassifierType: Winnow.bind(this, {
40 | retrain_count: 3
41 | })
42 | }),
43 | featureExtractor: NGramsOfWords(1),
44 | inputSplitter: RegexpSplitter("[.,;?!]|and")
45 | });
46 |
47 | classifier.trainBatch([
48 | { input: "I want aa", output: "A" }, // train on single class
49 | { input: "I want bb", output: "B" }, // train on array with single class (same effect)
50 | { input: "I want cc", output: "C" } // train on structured class, that will be stringified to "{C:c}".
51 | ]);
52 |
53 | classifier.classify("I want aa").should.eql(["A"]);
54 | classifier.classify("I want bb").should.eql(["B"]);
55 | classifier.classify("I want cc").should.eql(["C"]);
56 | classifier
57 | .classify("I want aa, I want bb, and I want cc")
58 | .should.eql(["A", "B", "C"]);
59 | });
60 | });
61 |
--------------------------------------------------------------------------------
/src/utils/unseen_correlation.js:
--------------------------------------------------------------------------------
1 | /*
2 | Correlation between unseen words and False Negative
3 |
4 | The assumption is that previously unseen word mostly might cause false negative type of mistake.
5 | Module does cross-validation on the given dataset, in the test utterances where there is
6 | unseen words and false negative mistake the the dict is build, where the key is a word and
7 | the value is the list of false negative mistakes.
8 |
9 | @author Vasily Konovalov
10 | */
11 |
12 | var _ = require('underscore')._;
13 | var fs = require('fs');
14 | var partitions = require('./partitions');
15 | var trainAndTest = require('./trainAndTest').trainAndTest;
16 | var trainAndTest_hash= require('./trainAndTest').trainAndTest_hash;
17 |
18 | function normalizer(sentence) {
19 | if (typeof sentence == 'undefined')
20 | {return ""}
21 | else
22 | {
23 | return sentence.toLowerCase().trim();
24 | }
25 | }
26 |
27 | function tokenizedataset(dataset, tokenize)
28 | {
29 | vocabulary = []
30 | for (var sample in dataset)
31 | {
32 | if (dataset[sample].length!=0)
33 | {
34 | var words = tokenize(normalizer(dataset[sample]['input']));
35 | vocabulary = vocabulary.concat(words);
36 | }
37 | }
38 | return _.uniq(vocabulary);
39 | }
40 |
41 | module.exports.tokenize = function(str)
42 | {
43 | pattern = new RegExp(/(\w+|\!|\'|\"")/i);
44 | str = str.split(pattern)
45 | return _.without(str,'',' ')
46 | }
47 | /*
48 | @params dataset - dataset to estimate the correlation
49 | @params classifier - classifier to estimate false negative mistakes.
50 |
51 | */
52 | module.exports.unseen_correlation = function(dataset, classifier, tokenize) {
53 | unseen_correlation = {}
54 |
55 | partitions.partitions(dataset, 5, function(trainSet, testSet, index) {
56 | unseen_vocabulary = tokenizedataset(testSet, tokenize)
57 | seen_vocabulary = tokenizedataset(trainSet, tokenize)
58 | var stats = trainAndTest_hash(classifier, trainSet, testSet, 5);
59 |
60 | _.each(stats['data'], function(report, key, list){
61 | if (report['explanations']['FN'].length > 0)
62 | {
63 | unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary)
64 | _.each(unseen_words, function(word, key, list) {
65 | if (!(word in unseen_correlation))
66 | {
67 | unseen_correlation[word] = []
68 | }
69 | unseen_correlation[word].push(report['explanations']['FN'])
70 | })
71 | }
72 | })
73 | })
74 | return unseen_correlation
75 | }
76 |
--------------------------------------------------------------------------------
/dist/utils/unseen_correlation.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /*
4 | Correlation between unseen words and False Negative
5 |
6 | The assumption is that previously unseen word mostly might cause false negative type of mistake.
7 | Module does cross-validation on the given dataset, in the test utterances where there is
8 | unseen words and false negative mistake the the dict is build, where the key is a word and
9 | the value is the list of false negative mistakes.
10 |
11 | @author Vasily Konovalov
12 | */
13 | var _ = require('underscore')._;
14 |
15 | var fs = require('fs');
16 |
17 | var partitions = require('./partitions');
18 |
19 | var trainAndTest = require('./trainAndTest').trainAndTest;
20 |
21 | var trainAndTest_hash = require('./trainAndTest').trainAndTest_hash;
22 |
23 | function normalizer(sentence) {
24 | if (typeof sentence == 'undefined') {
25 | return "";
26 | } else {
27 | return sentence.toLowerCase().trim();
28 | }
29 | }
30 |
31 | function tokenizedataset(dataset, tokenize) {
32 | vocabulary = [];
33 |
34 | for (var sample in dataset) {
35 | if (dataset[sample].length != 0) {
36 | var words = tokenize(normalizer(dataset[sample]['input']));
37 | vocabulary = vocabulary.concat(words);
38 | }
39 | }
40 |
41 | return _.uniq(vocabulary);
42 | }
43 |
44 | module.exports.tokenize = function (str) {
45 | pattern = new RegExp(/(\w+|\!|\'|\"")/i);
46 | str = str.split(pattern);
47 | return _.without(str, '', ' ');
48 | };
49 | /*
50 | @params dataset - dataset to estimate the correlation
51 | @params classifier - classifier to estimate false negative mistakes.
52 |
53 | */
54 |
55 |
56 | module.exports.unseen_correlation = function (dataset, classifier, tokenize) {
57 | unseen_correlation = {};
58 | partitions.partitions(dataset, 5, function (trainSet, testSet, index) {
59 | unseen_vocabulary = tokenizedataset(testSet, tokenize);
60 | seen_vocabulary = tokenizedataset(trainSet, tokenize);
61 | var stats = trainAndTest_hash(classifier, trainSet, testSet, 5);
62 |
63 | _.each(stats['data'], function (report, key, list) {
64 | if (report['explanations']['FN'].length > 0) {
65 | unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary);
66 |
67 | _.each(unseen_words, function (word, key, list) {
68 | if (!(word in unseen_correlation)) {
69 | unseen_correlation[word] = [];
70 | }
71 |
72 | unseen_correlation[word].push(report['explanations']['FN']);
73 | });
74 | }
75 | });
76 | });
77 | return unseen_correlation;
78 | };
--------------------------------------------------------------------------------
/test/utilsTest/PartitionsTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Partitions unit (creating partitions for train and test)
3 | */
4 |
5 | import _, { isEqual } from "underscore";
6 | import { partitions } from "../../dist/utils";
7 |
8 | describe("partitions", function() {
9 | it("partitions_consistent_by_fold", function() {
10 | var dataset = [1, 3, 5, 7, 9, 11, 13];
11 | var data = partitions.partitions_consistent_by_fold(dataset, 2, 1);
12 | isEqual(data, {
13 | train: [1, 3, 5, 13],
14 | test: [7, 9, 11]
15 | }).should.be.true;
16 | });
17 |
18 | it.skip("partitions_hash_fold", function() {
19 | var dataset = {
20 | label1: [1, 3, 5, 7, 9, 11, 13],
21 | label2: [0, 2, 4, 6, 8, 10, 12]
22 | };
23 | var data = partitions.partitions_hash_fold(dataset, 2, 1);
24 | isEqual(data["test"], [7, 9, 11, 6, 8, 10]).should.be.true;
25 | var data = partitions.partitions_hash_fold(dataset, 3, 2);
26 | isEqual(data["test"], [9, 11, 8, 10]).should.be.true;
27 | });
28 |
29 | it("partition hash", function() {
30 | var dataset = {
31 | label1: [1, 3, 5, 7, 9, 11, 13],
32 | label2: [0, 2, 4, 6, 8, 10, 12]
33 | };
34 | partitions.partitions_hash(dataset, 2, function(train, test, index) {
35 | test.should.have.lengthOf(6);
36 | train.should.have.lengthOf(4);
37 | });
38 | });
39 |
40 | // A dummy dataset with 10 documents:
41 | var dataset = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
42 | it("creates 5 partitions, with a test-set of 2 in each", function() {
43 | var numOfPartitions = 0;
44 | partitions.partitions(dataset, 5, function(train, test, index) {
45 | //console.log("\t"+index+": "+train+" / "+test);
46 | train.should.have.lengthOf(8);
47 | test.should.have.lengthOf(2);
48 | _(test)
49 | .intersection(train)
50 | .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test!
51 | numOfPartitions++;
52 | });
53 | numOfPartitions.should.equal(5);
54 | });
55 | it("creates 3 partitions, with a test-set of 3 in each", function() {
56 | var numOfPartitions = 0;
57 | partitions.partitions(dataset, 3, function(train, test, index) {
58 | //console.log("\t"+index+": "+train+" / "+test);
59 | train.should.have.lengthOf(7);
60 | test.should.have.lengthOf(3);
61 | _(test)
62 | .intersection(train)
63 | .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test!
64 | numOfPartitions++;
65 | });
66 | numOfPartitions.should.equal(3);
67 | });
68 | });
69 |
--------------------------------------------------------------------------------
/test/classifiersTest/WinnowExampleTest.js:
--------------------------------------------------------------------------------
1 | import { classifiers, features as _features } from "../../index";
2 |
3 | // First, define our base classifier type (a multi-label classifier based on winnow):
4 | var TextClassifier = classifiers.multilabel.BinaryRelevance.bind(0, {
5 | binaryClassifierType: classifiers.Winnow.bind(0, {
6 | retrain_count: 10
7 | })
8 | });
9 |
10 | // Define a feature extractor (a function that takes a sample and add features to a given features set):
11 | var WordExtractor = function(input, features) {
12 | input.split(" ").forEach(function(word) {
13 | features[word] = 1;
14 | });
15 | };
16 |
17 | describe("winnow classifier", function() {
18 | it("works with a feature-extractor", function() {
19 | // Initialize a classifier with a feature extractor:
20 | var intentClassifier = new classifiers.EnhancedClassifier({
21 | classifierType: TextClassifier,
22 | featureExtractor: WordExtractor
23 | });
24 |
25 | // Train and test:
26 | intentClassifier.trainBatch([
27 | {
28 | input: "I want an apple",
29 | output: "apl"
30 | },
31 | {
32 | input: "I want a banana",
33 | output: "bnn"
34 | },
35 | {
36 | input: "I want chips",
37 | output: "cps"
38 | }
39 | ]);
40 | intentClassifier
41 | .classify("I want an apple and a banana")
42 | .sort()
43 | .should.eql(["apl", "bnn"]);
44 | intentClassifier
45 | .classify("I WANT AN APPLE AND A BANANA")
46 | .sort()
47 | .should.eql([]); // case sensitive
48 | });
49 | });
50 |
51 | describe("winnow classifier", function() {
52 | it("works with a case-normalizer", function() {
53 | //Initialize a classifier with a feature extractor and a case normalizer:
54 | intentClassifier = new classifiers.EnhancedClassifier({
55 | classifierType: TextClassifier,
56 | normalizer: _features.LowerCaseNormalizer,
57 | featureExtractor: WordExtractor
58 | });
59 |
60 | //Train and test:
61 | intentClassifier.trainBatch([
62 | {
63 | input: "I want an apple",
64 | output: "apl"
65 | },
66 | {
67 | input: "I want a banana",
68 | output: "bnn"
69 | },
70 | {
71 | input: "I want chips",
72 | output: "cps"
73 | }
74 | ]);
75 |
76 | intentClassifier
77 | .classify("I want an apple and a banana")
78 | .sort()
79 | .should.eql(["apl", "bnn"]);
80 | intentClassifier
81 | .classify("I WANT AN APPLE AND A BANANA")
82 | .sort()
83 | .should.eql(["apl", "bnn"]); // case insensitive
84 | });
85 | });
86 |
--------------------------------------------------------------------------------
/src/features/index.js:
--------------------------------------------------------------------------------
1 | export const NGramsOfLetters = require("./NGramsOfLetters");
2 | export const Hypernyms = require("./HypernymExtractor");
3 | export const FeatureLookupTable = require("./FeatureLookupTable");
4 | export const LowerCaseNormalizer = require("./LowerCaseNormalizer");
5 | export const RegexpNormalizer = require("./RegexpNormalizer");
6 | export const RegexpSplitter = require("./RegexpSplitter");
7 |
8 | /**
9 | * CollectionOfExtractors - combines the features from several feature extractors.
10 | * @param extractors - an array of other feature extractors.
11 | * @param sample - a string.
12 | * @param features an initial hash of features (optional).
13 | * @return a hash with all features generated from the sample by the different extractors
14 | */
15 | export function CollectionOfExtractors(extractors) {
16 | return function(sample, features) {
17 | for (var i=0; i 0 ? 1 : -1);
32 | });
33 | return this.base.train(data, labels, this.opts);
34 | },
35 |
36 | /**
37 | * @param features - a feature-value hash.
38 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
39 | * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1.
40 | * @return the binary classification - 0 or 1.
41 | */
42 | classify: function(features, explain, continuous_output) {
43 | var score = this.base.marginOne(features);
44 | var classification = continuous_output ? score : score > 0 ? 1 : 0;
45 |
46 | if (explain > 0) {
47 | var f = this.base.b;
48 |
49 | // if the linear kernel was used and w was computed and stored,
50 | // (i.e. the svm has fully finished training)
51 | // the internal class variable usew_ will be set to true.
52 | var explanations = [];
53 | if (this.base.usew_) {
54 | var w = this.base.w;
55 | for (var j = 0; j < this.base.D; j++) {
56 | explanations[j] = {
57 | feature: j,
58 | value: features[j],
59 | weight: w[j],
60 | relevance: features[j] * w[j]
61 | };
62 | }
63 | } else {
64 | // explanations not supported.
65 | //for(var i=0;i 0 ? 1 : -1);
29 | });
30 | return this.base.train(data, labels, this.opts);
31 | },
32 |
33 | /**
34 | * @param features - a feature-value hash.
35 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
36 | * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1.
37 | * @return the binary classification - 0 or 1.
38 | */
39 | classify: function classify(features, explain, continuous_output) {
40 | var score = this.base.marginOne(features);
41 | var classification = continuous_output ? score : score > 0 ? 1 : 0;
42 |
43 | if (explain > 0) {
44 | var f = this.base.b; // if the linear kernel was used and w was computed and stored,
45 | // (i.e. the svm has fully finished training)
46 | // the internal class variable usew_ will be set to true.
47 |
48 | var explanations = [];
49 |
50 | if (this.base.usew_) {
51 | var w = this.base.w;
52 |
53 | for (var j = 0; j < this.base.D; j++) {
54 | explanations[j] = {
55 | feature: j,
56 | value: features[j],
57 | weight: w[j],
58 | relevance: features[j] * w[j]
59 | };
60 | }
61 | } else {// explanations not supported.
62 | //for(var i=0;i frontmatter.date <= new Date(),
30 | count: 20
31 | }
32 | ]
33 | ],
34 | head: [
35 | [
36 | 'link',
37 | { rel: 'apple-touch-icon', sizes: '57x57', href: '/apple-icon-57x57.png' }
38 | ],
39 | [
40 | 'link',
41 | { rel: 'apple-touch-icon', sizes: '60x60', href: '/apple-icon-60x60.png' }
42 | ],
43 | [
44 | 'link',
45 | { rel: 'apple-touch-icon', sizes: '72x72', href: '/apple-icon-72x72.png' }
46 | ],
47 | [
48 | 'link',
49 | { rel: 'apple-touch-icon', sizes: '76x76', href: '/apple-icon-76x76.png' }
50 | ],
51 | [
52 | 'link',
53 | {
54 | rel: 'apple-touch-icon',
55 | sizes: '114x114',
56 | href: '/apple-icon-114x114.png'
57 | }
58 | ],
59 | [
60 | 'link',
61 | {
62 | rel: 'apple-touch-icon',
63 | sizes: '120x120',
64 | href: '/apple-icon-120x120.png'
65 | }
66 | ],
67 | [
68 | 'link',
69 | {
70 | rel: 'apple-touch-icon',
71 | sizes: '144x144',
72 | href: '/apple-icon-144x144.png'
73 | }
74 | ],
75 | [
76 | 'link',
77 | {
78 | rel: 'apple-touch-icon',
79 | sizes: '152x152',
80 | href: '/apple-icon-152x152.png'
81 | }
82 | ],
83 | [
84 | 'link',
85 | {
86 | rel: 'apple-touch-icon',
87 | sizes: '180x180',
88 | href: '/apple-icon-180x180.png'
89 | }
90 | ],
91 | [
92 | 'link',
93 | {
94 | rel: 'icon',
95 | type: 'image/png',
96 | sizes: '192x192',
97 | href: '/android-icon-192x192.png'
98 | }
99 | ],
100 | [
101 | 'link',
102 | {
103 | rel: 'icon',
104 | type: 'image/png',
105 | sizes: '32x32',
106 | href: '/favicon-32x32.png'
107 | }
108 | ],
109 | [
110 | 'link',
111 | {
112 | rel: 'icon',
113 | type: 'image/png',
114 | sizes: '96x96',
115 | href: '/favicon-96x96.png'
116 | }
117 | ],
118 | [
119 | 'link',
120 | {
121 | rel: 'icon',
122 | type: 'image/png',
123 | sizes: '16x16',
124 | href: '/favicon-16x16.png'
125 | }
126 | ],
127 | ['link', { rel: 'manifest', href: '/manifest.json' }],
128 | ['meta', { name: 'msapplication-TileColor', content: '#ffffff' }],
129 | [
130 | 'meta',
131 | { name: 'msapplication-TileImage', content: '/ms-icon-144x144.png' }
132 | ],
133 | ['meta', { name: 'theme-color', content: '#ffffff' }]
134 | ]
135 | }
136 |
--------------------------------------------------------------------------------
/dist/features/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | Object.defineProperty(exports, "__esModule", {
4 | value: true
5 | });
6 | exports.CollectionOfExtractors = CollectionOfExtractors;
7 | exports.NGramsFromArray = NGramsFromArray;
8 | exports.NGramsOfWords = NGramsOfWords;
9 | exports.call = call;
10 | exports.normalize = normalize;
11 | exports.RegexpSplitter = exports.RegexpNormalizer = exports.LowerCaseNormalizer = exports.FeatureLookupTable = exports.Hypernyms = exports.NGramsOfLetters = void 0;
12 |
13 | var NGramsOfLetters = require("./NGramsOfLetters");
14 |
15 | exports.NGramsOfLetters = NGramsOfLetters;
16 |
17 | var Hypernyms = require("./HypernymExtractor");
18 |
19 | exports.Hypernyms = Hypernyms;
20 |
21 | var FeatureLookupTable = require("./FeatureLookupTable");
22 |
23 | exports.FeatureLookupTable = FeatureLookupTable;
24 |
25 | var LowerCaseNormalizer = require("./LowerCaseNormalizer");
26 |
27 | exports.LowerCaseNormalizer = LowerCaseNormalizer;
28 |
29 | var RegexpNormalizer = require("./RegexpNormalizer");
30 |
31 | exports.RegexpNormalizer = RegexpNormalizer;
32 |
33 | var RegexpSplitter = require("./RegexpSplitter");
34 | /**
35 | * CollectionOfExtractors - combines the features from several feature extractors.
36 | * @param extractors - an array of other feature extractors.
37 | * @param sample - a string.
38 | * @param features an initial hash of features (optional).
39 | * @return a hash with all features generated from the sample by the different extractors
40 | */
41 |
42 |
43 | exports.RegexpSplitter = RegexpSplitter;
44 |
45 | function CollectionOfExtractors(extractors) {
46 | return function (sample, features) {
47 | for (var i = 0; i < extractors.length; ++i) {
48 | extractors[i](sample, features);
49 | }
50 | };
51 | }
52 |
53 | ;
54 | /**
55 | * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
56 | */
57 |
58 | function NGramsFromArray(numOfWords, gap, grams, features) {
59 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
60 | grams.unshift("[start]");
61 | grams.push("[end]");
62 | }
63 |
64 | for (var i = 0; i <= grams.length - numOfWords; ++i) {
65 | var sliceOfWords = grams.slice(i, i + numOfWords);
66 | if (gap) sliceOfWords[1] = "-";
67 | var feature = sliceOfWords.join(" ");
68 | features[feature.trim()] = 1;
69 | }
70 |
71 | for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
72 | grams.pop();
73 | grams.shift();
74 | }
75 | }
76 |
77 | function NGramsOfWords(numOfWords, gap) {
78 | return function (sample, features) {
79 | var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
80 | return !!a;
81 | }); // all non-empty words
82 |
83 | NGramsFromArray(numOfWords, gap, words, features);
84 | };
85 | }
86 | /**
87 | * Call the given featureExtractor on the given sample, and return the result.
88 | * Used for testing.
89 | */
90 |
91 |
92 | function call(featureExtractor, sample) {
93 | var features = {};
94 | featureExtractor(sample, features);
95 | return features;
96 | }
97 | /**
98 | * If the input is a featureExtractor, return it as is.
99 | *
100 | * If it is an array of featureExtractors, convert it to a CollectionOfExtractors.
101 | *
102 | */
103 |
104 |
105 | function normalize(featureExtractorOrArray) {
106 | return !featureExtractorOrArray ? featureExtractorOrArray : Array.isArray(featureExtractorOrArray) ? new CollectionOfExtractors(featureExtractorOrArray) : featureExtractorOrArray;
107 | }
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at toroo.byamba@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/test/classifiersTest/NeuralWithNormalizerTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Enhanced Classifier
3 | */
4 |
5 | import { EnhancedClassifier, NeuralNetwork } from "../../dist/core";
6 | import {
7 | LowerCaseNormalizer,
8 | NGramsOfWords,
9 | RegexpNormalizer
10 | } from "../../dist/features";
11 |
12 | describe("baseline - classifier without a normalizer", function() {
13 | it("errs on non-normalized sentencs", function() {
14 | var spamClassifier = new EnhancedClassifier({
15 | classifierType: NeuralNetwork,
16 | featureExtractor: NGramsOfWords(1),
17 | normalizer: null
18 | });
19 |
20 | spamClassifier.trainBatch([
21 | {
22 | input: "cheaper watches",
23 | output: [1]
24 | },
25 | {
26 | input: "",
27 | output: [0]
28 | }
29 | ]);
30 |
31 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
32 | spamClassifier.classify("cheapest watch es").should.be.below(0.2); // very high number (spam)
33 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
34 | });
35 | });
36 |
37 | describe("classifier with a single normalizer", function() {
38 | it("classifies sentences correctly", function() {
39 | var spamClassifier = new EnhancedClassifier({
40 | classifierType: NeuralNetwork,
41 | featureExtractor: NGramsOfWords(1),
42 | normalizer: RegexpNormalizer([
43 | {
44 | source: "er\\b",
45 | target: ""
46 | },
47 | {
48 | source: "est\\b",
49 | target: ""
50 | },
51 | {
52 | source: " es\\b",
53 | target: "es"
54 | }
55 | ])
56 | });
57 |
58 | spamClassifier.trainBatch([
59 | {
60 | input: "cheaper watches",
61 | output: [1]
62 | },
63 | {
64 | input: "",
65 | output: [0]
66 | }
67 | ]);
68 |
69 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
70 | spamClassifier.classify("cheapest watch es").should.be.above(0.8); // low number (not spam)
71 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
72 | });
73 | });
74 |
75 | describe("classifier with an array of normalizers", function() {
76 | it("classifies sentences correctly", function() {
77 | var spamClassifier = new EnhancedClassifier({
78 | classifierType: NeuralNetwork,
79 | featureExtractor: NGramsOfWords(1),
80 | normalizer: [
81 | LowerCaseNormalizer,
82 | RegexpNormalizer([
83 | {
84 | source: "er\\b",
85 | target: ""
86 | }
87 | ]),
88 | RegexpNormalizer([
89 | {
90 | source: "est\\b",
91 | target: ""
92 | }
93 | ]),
94 | RegexpNormalizer([
95 | {
96 | source: " es\\b",
97 | target: "es"
98 | }
99 | ])
100 | ]
101 | });
102 |
103 | spamClassifier.trainBatch([
104 | {
105 | input: "ChEaPeR WaTcHeS",
106 | output: [1]
107 | },
108 | {
109 | input: "",
110 | output: [0]
111 | }
112 | ]);
113 |
114 | spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
115 | spamClassifier.classify("cheapest watch es").should.be.above(0.8); // high number (spam)
116 | spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
117 | });
118 | });
119 |
--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MulticlassSegmentationBayesTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Multi-Label classification in the multiclass segmentation method
3 | */
4 |
5 | import { Bayesian, multilabel } from "../../../src/core";
6 | import { NGramsOfWords } from "../../../src/features";
7 | import "../../sorted";
8 |
9 | var MulticlassSegmentationBayes = multilabel.MulticlassSegmentation.bind(this, {
10 | multiclassClassifierType: Bayesian.bind(this, {
11 | calculateRelativeProbabilities: true
12 | }),
13 | featureExtractor: NGramsOfWords(1)
14 | });
15 |
16 | // MulticlassSegmentationBayes is now in repair
17 | describe.skip("Multi-Label MCS Classifier Trained on Single-class inputs", function() {
18 | var classifier = new MulticlassSegmentationBayes();
19 | classifier.trainBatch([
20 | { input: "I want aa", output: "A" },
21 | { input: "I want bb", output: "B" },
22 | { input: "I want cc", output: "C" }
23 | ]);
24 |
25 | it("classifies 1-class samples", function() {
26 | classifier.classify("I want aa").should.eql(["A"]);
27 | classifier.classify("I want bb").should.eql(["B"]);
28 | classifier.classify("I want cc").should.eql(["C"]);
29 | });
30 |
31 | it("classifies 2-class samples", function() {
32 | classifier
33 | .classify("I want aa bb")
34 | .sorted()
35 | .should.eql(["A", "B"]);
36 | classifier
37 | .classify("I want bb cc")
38 | .sorted()
39 | .should.eql(["B", "C"]);
40 | classifier
41 | .classify("I want cc aa")
42 | .sorted()
43 | .should.eql(["A", "C"]);
44 | });
45 |
46 | it("classifies 2-class samples with a redundant word", function() {
47 | classifier
48 | .classify("I want aa and bb")
49 | .sorted()
50 | .should.eql(["A", "B"]);
51 | classifier
52 | .classify("I want bb and cc")
53 | .sorted()
54 | .should.eql(["B", "C"]);
55 | classifier
56 | .classify("I want cc and aa")
57 | .sorted()
58 | .should.eql(["A", "C"]);
59 | });
60 |
61 | it("classifies 3-class samples", function() {
62 | classifier
63 | .classify("I want cc and aa and bb")
64 | .sorted()
65 | .should.eql(["A", "B", "C"]);
66 | });
67 |
68 | // TODO: fix this case
69 | // it('classifies 0-class samples', function() {
70 | // classifier.classify("I want nothing").should.eql([]);
71 | // });
72 | });
73 |
74 | /*describe('Multi-Label MCS Classifier Trained on two-class inputs', function() {
75 | var classifier = new MulticlassSegmentationBayes();
76 | classifier.trainBatch([
77 | {input: {I:1 , want:1 , aa:1 , bb:1 }, output: ['A','B']}, // train on array with classes
78 | {input: {I:1 , want:1 , bb:1 , cc:1 }, output: ['B','C']}, // train on array with classes
79 | {input: {I:1 , want:1 , cc:1 , dd:1 }, output: [{C:1, D:1}]}, // train on set of classes
80 | {input: {I:1 , want:1 , dd:1 , aa:1 }, output: [{D:1, A:1}]}, // train on set of classes
81 | ]);
82 |
83 | it('classifies 1-class samples', function() {
84 | classifier.classify({I:1 , want:1 , aa:1 }).should.eql(['A']);
85 | //classifier.classify({I:1 , want:1 , bb:1 }).should.eql(['B']);
86 | //classifier.classify({I:1 , want:1 , cc:1 }).should.eql(['C']);
87 | //classifier.classify({I:1 , want:1 , dd:1 }).should.eql(['D']);
88 | });
89 |
90 | it('classifies 2-class samples', function() {
91 | classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }).should.eql(['A','B']);
92 | classifier.classify({I:1 , want:1 , bb:1 , and:1 , cc:1 }).should.eql(['B','C']);
93 | //classifier.classify({I:1 , want:1 , cc:1 , and:1 , dd:1 }).should.eql(['C','D']);
94 | //classifier.classify({I:1 , want:1 , dd:1 , and:1 , aa:1 }).should.eql(['D','A']);
95 | });
96 | });
97 |
98 |
99 | */
100 |
--------------------------------------------------------------------------------
/test/classifiersTest/WinnowTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for winnow classifier
3 | */
4 |
5 | import { Winnow } from "../../dist/core";
6 |
7 | var WinnowClassifier = Winnow.bind(this, {
8 | retrain_count: 10,
9 | do_averaging: false,
10 | margin: 1
11 | });
12 |
13 | describe("winnow classifier", function() {
14 | it("supports online training", function() {
15 | var classifier = new WinnowClassifier();
16 | classifier.trainOnline(
17 | {
18 | a: 1,
19 | b: 0
20 | },
21 | 0
22 | );
23 | classifier
24 | .classify({
25 | a: 1,
26 | b: 0
27 | })
28 | .should.equal(0);
29 | classifier
30 | .classify({
31 | a: 0,
32 | b: 0
33 | })
34 | .should.equal(0);
35 | classifier
36 | .classify({
37 | a: 0,
38 | b: 1
39 | })
40 | .should.equal(0);
41 | classifier
42 | .classify({
43 | a: 1,
44 | b: 1
45 | })
46 | .should.equal(0);
47 |
48 | classifier.trainOnline(
49 | {
50 | a: 0,
51 | b: 1
52 | },
53 | 1
54 | );
55 | classifier
56 | .classify({
57 | a: 1,
58 | b: 0
59 | })
60 | .should.equal(0);
61 | classifier
62 | .classify({
63 | a: 0,
64 | b: 1
65 | })
66 | .should.equal(1);
67 | });
68 |
69 | it("supports batch and online training", function() {
70 | var dataset = [
71 | {
72 | input: {
73 | a: 1,
74 | b: 0
75 | },
76 | output: 0
77 | },
78 | {
79 | input: {
80 | a: 0,
81 | b: 1
82 | },
83 | output: 1
84 | }
85 | ];
86 | //console.log("batch: ");
87 | var classifierBatch = new WinnowClassifier();
88 | classifierBatch.trainBatch(dataset);
89 | //console.dir(classifierBatch);
90 |
91 | //console.log("online: ");
92 | var classifierOnline = new WinnowClassifier();
93 | for (var i = 0; i <= classifierBatch.retrain_count; ++i)
94 | for (var d = 0; d < dataset.length; ++d)
95 | classifierOnline.trainOnline(dataset[d].input, dataset[d].output);
96 | //console.dir(classifierOnline);
97 |
98 | classifierOnline.should.eql(classifierBatch);
99 | });
100 |
101 | it("supports continuous output", function() {
102 | var classifier = new WinnowClassifier();
103 | classifier.trainOnline(
104 | {
105 | a: 1,
106 | b: 0
107 | },
108 | 0
109 | );
110 | classifier.trainOnline(
111 | {
112 | a: 0,
113 | b: 1
114 | },
115 | 1
116 | );
117 | classifier
118 | .classify(
119 | {
120 | a: 1,
121 | b: 0
122 | },
123 | 0,
124 | true
125 | )
126 | .should.be.below(0);
127 | classifier
128 | .classify(
129 | {
130 | a: 0,
131 | b: 1
132 | },
133 | 0,
134 | true
135 | )
136 | .should.be.above(0);
137 | });
138 |
139 | it("explains its decisions", function() {
140 | var classifier = new WinnowClassifier();
141 | classifier.trainOnline(
142 | {
143 | a: 1,
144 | b: 0
145 | },
146 | 0
147 | );
148 | classifier
149 | .classify(
150 | {
151 | a: 0,
152 | b: 0
153 | },
154 | /*explain=*/ 1
155 | )
156 | .should.have.property("explanation")
157 | .with.lengthOf(1);
158 | classifier
159 | .classify(
160 | {
161 | a: 0,
162 | b: 0
163 | },
164 | /*explain=*/ 3
165 | )
166 | .should.have.property("explanation")
167 | .with.lengthOf(3);
168 | });
169 | });
170 |
--------------------------------------------------------------------------------
/src/core/svm/svmcommon.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Utilities common to SVM wrappers
3 | */
4 |
5 | var temp = require('temp')
6 | , fs = require('fs')
7 | , svmlight = require('../../formats/svmlight')
8 | , _ = require('underscore')._
9 |
10 | /**
11 | * Writes the given dataset to a file in svm-light format.
12 | * @return the file name.
13 | */
14 | module.exports.writeDatasetToFile = function(dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) {
15 | if (model_file_prefix) {
16 | var learnFile = model_file_prefix+".learn";
17 | var fd = fs.openSync(learnFile, 'w');
18 | } else {
19 | var tempFile = temp.openSync({prefix:default_file_prefix+"-", suffix:".learn"});
20 | var learnFile = tempFile.path;
21 | var fd = tempFile.fd;
22 | }
23 | var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber);
24 | fs.writeSync(fd, datasetSvmlight);
25 | fs.closeSync(fd);
26 |
27 | return learnFile;
28 | }
29 |
30 | /**
31 | * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map).
32 | * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists).
33 | * @param bias if nonzero, added at the beginning of features.
34 | * @param features a map {feature_i: value_i, ....} (i >= 1)
35 | * @param explain (int) if positive, generate explanation about the classification.
36 | * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1.
37 | * @param featureLookupTable if not null, used for creating meaningful explanations.
38 | * @returns a classification value.
39 | */
40 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) {
41 | if (explain>0) var explanations = [];
42 | var result = 0;
43 | if (bias && modelMap[0]) {
44 | var weight = modelMap[0];
45 | var relevance = bias*modelMap[0];
46 | result = relevance;
47 | if (explain>0) explanations.push(
48 | {
49 | feature: 'bias',
50 | value: bias,
51 | weight: weight,
52 | relevance: relevance,
53 | }
54 | );
55 |
56 | }
57 |
58 | for (var feature in features) {
59 | var featureInModelMap = parseInt(feature)+(bias?1:0);
60 | if (featureInModelMap in modelMap) {
61 | var weight = modelMap[featureInModelMap];
62 | var value = features[feature];
63 | var relevance = weight*value;
64 | result += relevance;
65 |
66 | if (explain>0) explanations.push(
67 | {
68 | feature: featureLookupTable? (featureLookupTable.numberToFeature(feature)||"?"): feature,
69 | value: value,
70 | weight: weight,
71 | relevance: relevance,
72 | }
73 | );
74 | }
75 | }
76 |
77 | if (!continuous_output)
78 | result = (result>0? 1: 0);
79 | if (_.isNaN(result)) {
80 | console.dir(explanations);
81 | throw new Error("result is NaN when classifying "+features+" with "+JSON.stringify(modelMap))
82 | }
83 | if (explain>0) {
84 | explanations.sort(function(a,b){return Math.abs(b.relevance)-Math.abs(a.relevance)});
85 | var explanations = _.filter(explanations, function(num){ return num.relevance!=0 });
86 |
87 | // explanations.splice(explain, explanations.length-explain); // "explain" is the max length of explanation.
88 |
89 |
90 | if (!this.detailed_explanations) {
91 | // var sprintf = require('sprintf').sprintf;
92 | explanations = explanations.map(function(e) {
93 | // return sprintf("%s%+1.2f", e.feature, e.relevance);
94 | return [e.feature, e.relevance];
95 | });
96 |
97 | explanations = _.sortBy(explanations, function(num){ return num[1] }).reverse()
98 |
99 | }
100 | return {
101 | classification: result,
102 | explanation: explanations
103 | };
104 | } else {
105 | return result;
106 | }
107 | }
108 |
109 |
--------------------------------------------------------------------------------
/dist/core/svm/svmcommon.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /**
4 | * Utilities common to SVM wrappers
5 | */
6 | var temp = require('temp'),
7 | fs = require('fs'),
8 | svmlight = require('../../formats/svmlight'),
9 | _ = require('underscore')._;
10 | /**
11 | * Writes the given dataset to a file in svm-light format.
12 | * @return the file name.
13 | */
14 |
15 |
16 | module.exports.writeDatasetToFile = function (dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) {
17 | if (model_file_prefix) {
18 | var learnFile = model_file_prefix + ".learn";
19 | var fd = fs.openSync(learnFile, 'w');
20 | } else {
21 | var tempFile = temp.openSync({
22 | prefix: default_file_prefix + "-",
23 | suffix: ".learn"
24 | });
25 | var learnFile = tempFile.path;
26 | var fd = tempFile.fd;
27 | }
28 |
29 | var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber);
30 | fs.writeSync(fd, datasetSvmlight);
31 | fs.closeSync(fd);
32 | return learnFile;
33 | };
34 | /**
35 | * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map).
36 | * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists).
37 | * @param bias if nonzero, added at the beginning of features.
38 | * @param features a map {feature_i: value_i, ....} (i >= 1)
39 | * @param explain (int) if positive, generate explanation about the classification.
40 | * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1.
41 | * @param featureLookupTable if not null, used for creating meaningful explanations.
42 | * @returns a classification value.
43 | */
44 |
45 |
46 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) {
47 | if (explain > 0) var explanations = [];
48 | var result = 0;
49 |
50 | if (bias && modelMap[0]) {
51 | var weight = modelMap[0];
52 | var relevance = bias * modelMap[0];
53 | result = relevance;
54 | if (explain > 0) explanations.push({
55 | feature: 'bias',
56 | value: bias,
57 | weight: weight,
58 | relevance: relevance
59 | });
60 | }
61 |
62 | for (var feature in features) {
63 | var featureInModelMap = parseInt(feature) + (bias ? 1 : 0);
64 |
65 | if (featureInModelMap in modelMap) {
66 | var weight = modelMap[featureInModelMap];
67 | var value = features[feature];
68 | var relevance = weight * value;
69 | result += relevance;
70 | if (explain > 0) explanations.push({
71 | feature: featureLookupTable ? featureLookupTable.numberToFeature(feature) || "?" : feature,
72 | value: value,
73 | weight: weight,
74 | relevance: relevance
75 | });
76 | }
77 | }
78 |
79 | if (!continuous_output) result = result > 0 ? 1 : 0;
80 |
81 | if (_.isNaN(result)) {
82 | console.dir(explanations);
83 | throw new Error("result is NaN when classifying " + features + " with " + JSON.stringify(modelMap));
84 | }
85 |
86 | if (explain > 0) {
87 | explanations.sort(function (a, b) {
88 | return Math.abs(b.relevance) - Math.abs(a.relevance);
89 | });
90 |
91 | var explanations = _.filter(explanations, function (num) {
92 | return num.relevance != 0;
93 | }); // explanations.splice(explain, explanations.length-explain); // "explain" is the max length of explanation.
94 |
95 |
96 | if (!this.detailed_explanations) {
97 | // var sprintf = require('sprintf').sprintf;
98 | explanations = explanations.map(function (e) {
99 | // return sprintf("%s%+1.2f", e.feature, e.relevance);
100 | return [e.feature, e.relevance];
101 | });
102 | explanations = _.sortBy(explanations, function (num) {
103 | return num[1];
104 | }).reverse();
105 | }
106 |
107 | return {
108 | classification: result,
109 | explanation: explanations
110 | };
111 | } else {
112 | return result;
113 | }
114 | };
--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MetaLabelerLanguageModelTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Multi-Label classification in the Meta-Labeler method,
3 | * with Cross-Language-Model as the underlying ranker.
4 | */
5 |
6 | import { multilabel, Winnow } from "../../../dist/core";
7 | import "../../sorted";
8 |
9 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, {
10 | binaryClassifierType: Winnow.bind(this, {
11 | promotion: 1.5,
12 | demotion: 0.5,
13 | margin: 1,
14 | retrain_count: 10
15 | })
16 | });
17 |
18 | var CrossLanguageModelClassifier = multilabel.CrossLanguageModel.bind(this, {
19 | smoothingCoefficient: 0.9,
20 | labelFeatureExtractor: function(string, features) {
21 | if (!features) features = {};
22 | features[string] = 1;
23 | return features;
24 | }
25 | });
26 |
27 | var MetaLabelerLanguageModel = multilabel.MetaLabeler.bind(this, {
28 | rankerType: CrossLanguageModelClassifier,
29 | counterType: BinaryRelevanceWinnow
30 | });
31 |
32 | var dataset = [
33 | { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class
34 | { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect)
35 | { input: { I: 1, want: 1, cc: 1 }, output: "C" }
36 | ];
37 |
38 | describe("CLIR Meta-Labeler batch-trained on Single-class inputs", function() {
39 | var classifierBatch = new MetaLabelerLanguageModel();
40 | classifierBatch.trainBatch(dataset);
41 |
42 | var classifier = classifierBatch;
43 |
44 | it("classifies 1-class samples", function() {
45 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
46 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
47 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(["C"]);
48 | });
49 |
50 | it("knows its classes", function() {
51 | classifier.getAllClasses().should.eql(["A", "B", "C"]);
52 | });
53 |
54 | it("explains its decisions", function() {
55 | var ab = classifier.classify(
56 | { I: 1, want: 1, aa: 1, and: 1, bb: 1 },
57 | /*explain=*/ 3
58 | );
59 | ab.should.have.property("explanation").with.property("ranking");
60 | ab.should.have.property("explanation").with.property("counting");
61 | });
62 | });
63 |
64 | describe("CLIR Meta-Labeler batch-trained on two-class inputs", function() {
65 | var classifier = new MetaLabelerLanguageModel();
66 | classifier.trainBatch([
67 | { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] }, // train on array with classes
68 | { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] }, // train on array with classes
69 | { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] },
70 | { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] }
71 | ]);
72 |
73 | it("classifies 2-class samples", function() {
74 | classifier
75 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 })
76 | .sorted()
77 | .should.eql(["A", "B"]);
78 | classifier
79 | .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 })
80 | .sorted()
81 | .should.eql(["B", "C"]);
82 | classifier
83 | .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 })
84 | .sorted()
85 | .should.eql(["C", "D"]);
86 | classifier
87 | .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 })
88 | .sorted()
89 | .should.eql(["A", "D"]);
90 | });
91 |
92 | it("explains its decisions", function() {
93 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4);
94 | classifier
95 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
96 | .should.have.property("explanation")
97 | .with.property("ranking");
98 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
99 | classifier
100 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
101 | .should.have.property("explanation")
102 | .with.property("counting");
103 | });
104 | });
105 |
--------------------------------------------------------------------------------
/test/classifiersTest/SvmMulticlassTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for SvmLinear classifier (a wrapper for LibLinear), as a multi-class classifier.
3 | */
4 |
5 | import { EnhancedClassifier, SvmLinear } from "../../dist/core";
6 | import { FeatureLookupTable } from "../../dist/features";
7 |
8 | if (!SvmLinear.isInstalled()) {
9 | console.warn("liblinear_train not found - SvmMulticlass tests skipped.");
10 | } else {
11 | var SvmClassifier = SvmLinear.bind(0, {
12 | multiclass: true,
13 | learn_args: "-c 20.0"
14 | });
15 |
16 | describe(
17 | "SVM-LibLinear multiclass" + " with numeric features and numeric labels",
18 | function() {
19 | var trainSet = [
20 | {
21 | input: [0, 0],
22 | output: 3
23 | },
24 | {
25 | input: [1, 1],
26 | output: 3
27 | },
28 |
29 | {
30 | input: [0, 1],
31 | output: 4
32 | },
33 | {
34 | input: [1, 2],
35 | output: 4
36 | },
37 |
38 | {
39 | input: [0, 2],
40 | output: 5
41 | },
42 | {
43 | input: [1, 3],
44 | output: 5
45 | }
46 | ];
47 |
48 | var classifier = new SvmClassifier();
49 | classifier.trainBatch(trainSet);
50 |
51 | it("supports multi-class output", function() {
52 | classifier.classify([1, 0]).should.equal(3);
53 | classifier.classify([0, 1.3]).should.equal(4);
54 | classifier.classify([0, 1.7]).should.equal(5);
55 | classifier.classify([0, 3]).should.equal(5);
56 | });
57 |
58 | it("explains its decisions", function() {
59 | classifier
60 | .classify([1, 0], 3)
61 | .should.have.property("explanation")
62 | .with.lengthOf(3);
63 | classifier
64 | .classify([0, 2], 5)
65 | .should.have.property("explanation")
66 | .with.lengthOf(3);
67 | });
68 |
69 | it("supports classification with scores", function() {
70 | classifier.classify([1, 0], 0, true).should.have.lengthOf(3);
71 | classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2);
72 | classifier.classify([0, 1.7], 0, true)[0][0].should.equal(5);
73 | classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5);
74 | });
75 | }
76 | );
77 |
78 | var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, {
79 | classifierType: SvmClassifier,
80 | featureLookupTable: new FeatureLookupTable()
81 | });
82 |
83 | var SvmClassifierStringLabels = EnhancedClassifier.bind(this, {
84 | classifierType: SvmClassifier,
85 | labelLookupTable: new FeatureLookupTable()
86 | });
87 |
88 | describe(
89 | "SVM-LibLinear multiclass" + " with numeric features and string labels",
90 | function() {
91 | var trainSet = [
92 | {
93 | input: [0, 0],
94 | output: "a"
95 | },
96 | {
97 | input: [1, 1],
98 | output: "a"
99 | },
100 |
101 | {
102 | input: [0, 1],
103 | output: "b"
104 | },
105 | {
106 | input: [1, 2],
107 | output: "b"
108 | },
109 |
110 | {
111 | input: [0, 2],
112 | output: "c"
113 | },
114 | {
115 | input: [1, 3],
116 | output: "c"
117 | }
118 | ];
119 |
120 | var classifier = new SvmClassifierStringLabels();
121 | classifier.trainBatch(trainSet);
122 |
123 | it("supports multi-class output", function() {
124 | classifier.classify([1, 0]).should.equal("a");
125 | classifier.classify([0, 1.3]).should.equal("b");
126 | classifier.classify([0, 1.7]).should.equal("c");
127 | classifier.classify([0, 3]).should.equal("c");
128 | });
129 |
130 | it("explains its decisions", function() {
131 | classifier
132 | .classify([1, 0], 3)
133 | .should.have.property("explanation")
134 | .with.lengthOf(3);
135 | classifier
136 | .classify([0, 2], 5)
137 | .should.have.property("explanation")
138 | .with.lengthOf(3);
139 | });
140 |
141 | it("supports classification with scores", function() {
142 | classifier.classify([1, 0], 0, true).should.have.lengthOf(3);
143 | classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2);
144 | classifier.classify([0, 1.7], 0, true)[0][0].should.equal("c"); // must be the first!
145 | classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5);
146 | });
147 | }
148 | );
149 | }
150 |
--------------------------------------------------------------------------------
/test/classifiersTest/SvmTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for SvmLinear classifier (a wrapper for LibLinear) and SvmPerf classifier.
3 | */
4 |
5 | import { EnhancedClassifier, SvmLinear, SvmPerf } from "../../dist/core";
6 | import { FeatureLookupTable } from "../../dist/features";
7 |
8 | function test(name, SvmClassifier) {
9 | describe(name + " with numeric features", function() {
10 | var trainSet = [
11 | {
12 | input: [0, 0],
13 | output: 0
14 | },
15 | {
16 | input: [1, 1],
17 | output: 0
18 | },
19 | {
20 | input: [0, 1],
21 | output: 1
22 | },
23 | {
24 | input: [1, 2],
25 | output: 1
26 | }
27 | ];
28 |
29 | var classifier = new SvmClassifier();
30 | classifier.trainBatch(trainSet);
31 |
32 | it("finds the maximal margin separator", function() {
33 | // the max-margin separating line goes through [0,0.5] and [1,1.5]. It is:
34 | // 0.5+x-y = 0
35 | // or: 2y-2x-1 = 0
36 | //classifier.modelMap.should.eql({ '0': -1, '1': -2, '2': 2 }); // the LibLinear algorithm is not accurate:
37 | var modelWeights = classifier.getModelWeights();
38 |
39 | modelWeights[0].should.be.within(-1.5, -0.5);
40 | modelWeights[1].should.be.within(-2.5, -1.5);
41 | modelWeights[2].should.be.within(1.5, 2.5);
42 | });
43 |
44 | it("supports binary output", function() {
45 | classifier.classify([0, 2]).should.eql(1);
46 | classifier.classify([1, 0]).should.eql(0);
47 | });
48 |
49 | // it('explains its decisions', function() {
50 | // classifier.classify([0,2], 2).should.have.property("explanation").with.lengthOf(2);
51 | // classifier.classify([1,0], 3).should.have.property("explanation").with.lengthOf(3);
52 | // })
53 |
54 | it("supports continuous output", function() {
55 | classifier.classify([0, 2], 0, true).should.be.within(2.5, 3.5); // should equal 3, but it is not accurate enough
56 | classifier.classify([1, 0], 0, true).should.be.within(-3.5, -2.5); // should equal -3, but it is not accurate enough
57 | });
58 | });
59 |
60 | var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, {
61 | classifierType: SvmClassifier,
62 | featureLookupTable: new FeatureLookupTable()
63 | });
64 |
65 | describe(name + " with string features", function() {
66 | var trainSet = [
67 | {
68 | input: {
69 | a: 0,
70 | b: 0
71 | },
72 | output: 0
73 | },
74 | {
75 | input: {
76 | a: 1,
77 | b: 1
78 | },
79 | output: 0
80 | },
81 | {
82 | input: {
83 | a: 0,
84 | b: 1
85 | },
86 | output: 1
87 | },
88 | {
89 | input: {
90 | a: 1,
91 | b: 2
92 | },
93 | output: 1
94 | }
95 | ];
96 |
97 | var classifier = new SvmClassifierStringFeatures();
98 | classifier.trainBatch(trainSet);
99 |
100 | it("supports binary output", function() {
101 | classifier
102 | .classify({
103 | a: 0,
104 | b: 2
105 | })
106 | .should.eql(1);
107 | classifier
108 | .classify({
109 | a: 1,
110 | b: 0
111 | })
112 | .should.eql(0);
113 | });
114 |
115 | // it('explains its classifications', function() {
116 | // classifier.classify({a:0, b:2}, 2).should.have.property("explanation").with.lengthOf(2);
117 | // classifier.classify({a:1, b:0}, 3).should.have.property("explanation").with.lengthOf(3);
118 | // })
119 |
120 | it("supports continuous output", function() {
121 | classifier
122 | .classify(
123 | {
124 | a: 0,
125 | b: 2
126 | },
127 | 0,
128 | true
129 | )
130 | .should.be.above(0);
131 | classifier
132 | .classify(
133 | {
134 | a: 1,
135 | b: 0
136 | },
137 | 0,
138 | true
139 | )
140 | .should.be.below(0);
141 | });
142 | });
143 | } // end of function
144 |
145 | if (SvmPerf.isInstalled())
146 | test(
147 | "SVM-Perf",
148 | SvmPerf.bind(this, {
149 | learn_args: "-c 20.0"
150 | })
151 | );
152 | else console.warn("svm_perf_learn not found - SvmPerf tests skipped.");
153 |
154 | if (SvmLinear.isInstalled())
155 | test(
156 | "SVM-LibLinear",
157 | SvmLinear.bind(this, {
158 | learn_args: "-c 20.0",
159 | multiclass: false
160 | })
161 | );
162 | else console.warn("liblinear_train not found - SvmLinear tests skipped.");
163 |
--------------------------------------------------------------------------------
/src/features/FeatureLookupTable.js:
--------------------------------------------------------------------------------
1 | /**
2 | * FeatureLookupTable - a table for converting features to numbers and vice versa
3 | */
4 | class FeatureLookupTable {
5 | constructor() {
6 | this.featureIndexToFeatureName = [undefined];
7 | this.featureNameToFeatureIndex = { undefined: 0 };
8 | }
9 | }
10 |
11 | FeatureLookupTable.prototype = {
12 |
13 | // add a single feature, if it does not exist
14 | addFeature: function(feature) {
15 | if (!(feature in this.featureNameToFeatureIndex)) {
16 | var newIndex = this.featureIndexToFeatureName.length;
17 | this.featureIndexToFeatureName.push(feature);
18 | this.featureNameToFeatureIndex[feature] = newIndex;
19 | }
20 | },
21 |
22 | // add all features in the given hash or array
23 | addFeatures: function(hash) {
24 | if (hash instanceof Array) {
25 | for (var index in hash)
26 | this.addFeature(hash[index]);
27 | } else if (hash instanceof Object) {
28 | for (var feature in hash)
29 | this.addFeature(feature);
30 | }
31 | else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: "+JSON.stringify(hash));
32 | },
33 |
34 | // add all features in all hashes in the given array
35 | addFeaturess: function(hashes) {
36 | for (var i=0; i= 0 which represents a number of labels.
16 | * The MetaLabeler returns the C most relevant labels from the list returned by the ranker.
17 | *
18 | * @param opts
19 | * rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels.
20 | * counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels.
21 | */
22 | class MetaLabeler {
23 | constructor(opts) {
24 | if (!opts.rankerType) {
25 | console.dir(opts);
26 | throw new Error("opts.rankerType not found");
27 | }
28 | if (!opts.counterType) {
29 | console.dir(opts);
30 | throw new Error("opts.counterType not found");
31 | }
32 | this.ranker = new opts.rankerType();
33 | this.counter = new opts.counterType();
34 | }
35 | }
36 |
37 | MetaLabeler.prototype = {
38 |
39 | /**
40 | * Tell the classifier that the given sample belongs to the given classes.
41 | *
42 | * @param sample a document.
43 | * @param labels an array whose VALUES are classes.
44 | */
45 | trainOnline: function(sample, labels) {
46 | // The ranker is just trained by the given set of relevant labels:
47 | this.ranker.trainOnline(sample, labels);
48 |
49 | // The counter is trained by the *number* of relevant labels:
50 | var labelCount = (Array.isArray(labels)? labels: Object.keys(labels)).length;
51 | this.counter.trainOnline(sample, labelCount);
52 | },
53 |
54 | /**
55 | * Train the classifier with all the given documents.
56 | *
57 | * @param dataset
58 | * an array with objects of the format:
59 | * {input: sample1, output: [class11, class12...]}
60 | */
61 | trainBatch : function(dataset) {
62 | // The ranker is just trained by the given set of labels relevant to each sample:
63 | this.ranker.trainBatch(dataset);
64 |
65 | // The counter is trained by the *number* of labels relevant to each sample:
66 | var labelCountDataset = dataset.map(function(datum) {
67 | var labelCount = (Array.isArray(datum.output)? datum.output.length: 1);
68 | return {
69 | input: datum.input,
70 | output: labelCount
71 | };
72 | });
73 | this.counter.trainBatch(labelCountDataset);
74 | },
75 |
76 | /**
77 | * Use the model trained so far to classify a new sample.
78 | *
79 | * @param sample a document.
80 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
81 | *
82 | * @return an array whose VALUES are classes.
83 | */
84 | classify: function(sample, explain) {
85 | var rankedLabelsWithExplain = this.ranker.classify(sample, explain, /*withScores=*/true);
86 | var rankedLabels = (explain>0? rankedLabelsWithExplain.classes: rankedLabelsWithExplain);
87 | var labelCountWithExplain = this.counter.classify(sample, explain, /*withScores=*/true);
88 | var labelCount = (explain>0? labelCountWithExplain.classes[0][0]: labelCountWithExplain[0][0]);
89 | if (_.isString(labelCount)) labelCount = parseInt(labelCount);
90 |
91 | // Pick the labelCount most relevant labels from the list returned by the ranker:
92 | var positiveLabelsWithScores = rankedLabels.slice(0, labelCount);
93 |
94 | var positiveLabels = positiveLabelsWithScores
95 |
96 | if (positiveLabelsWithScores.length != 0)
97 | if (_.isArray(positiveLabelsWithScores[0]))
98 | var positiveLabels = positiveLabelsWithScores.map(function(labelWithScore) {return labelWithScore[0]});
99 |
100 | return (explain>0? {
101 | classes: positiveLabels,
102 | explanation: {
103 | ranking: rankedLabelsWithExplain.explanation,
104 | counting: labelCountWithExplain.explanation
105 | }
106 | }:
107 | positiveLabels)
108 | },
109 |
110 | getAllClasses: function() {
111 | return this.ranker.getAllClasses();
112 | },
113 |
114 | toJSON : function() {
115 | },
116 |
117 | fromJSON : function(json) {
118 | },
119 |
120 | /**
121 | * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations.
122 | */
123 | setFeatureLookupTable: function(featureLookupTable) {
124 | if (this.ranker.setFeatureLookupTable)
125 | this.ranker.setFeatureLookupTable(featureLookupTable);
126 | if (this.counter.setFeatureLookupTable)
127 | this.counter.setFeatureLookupTable(featureLookupTable);
128 | },
129 | }
130 |
131 |
132 | module.exports = MetaLabeler;
133 |
--------------------------------------------------------------------------------
/dist/features/FeatureLookupTable.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
4 |
5 | /**
6 | * FeatureLookupTable - a table for converting features to numbers and vice versa
7 | */
8 | var FeatureLookupTable = function FeatureLookupTable() {
9 | _classCallCheck(this, FeatureLookupTable);
10 |
11 | this.featureIndexToFeatureName = [undefined];
12 | this.featureNameToFeatureIndex = {
13 | undefined: 0
14 | };
15 | };
16 |
17 | FeatureLookupTable.prototype = {
18 | // add a single feature, if it does not exist
19 | addFeature: function addFeature(feature) {
20 | if (!(feature in this.featureNameToFeatureIndex)) {
21 | var newIndex = this.featureIndexToFeatureName.length;
22 | this.featureIndexToFeatureName.push(feature);
23 | this.featureNameToFeatureIndex[feature] = newIndex;
24 | }
25 | },
26 | // add all features in the given hash or array
27 | addFeatures: function addFeatures(hash) {
28 | if (hash instanceof Array) {
29 | for (var index in hash) {
30 | this.addFeature(hash[index]);
31 | }
32 | } else if (hash instanceof Object) {
33 | for (var feature in hash) {
34 | this.addFeature(feature);
35 | }
36 | } else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: " + JSON.stringify(hash));
37 | },
38 | // add all features in all hashes in the given array
39 | addFeaturess: function addFeaturess(hashes) {
40 | for (var i = 0; i < hashes.length; ++i) {
41 | this.addFeatures(hashes[i]);
42 | }
43 | },
44 |
45 | /**
46 | * Convert the given feature to a numeric index.
47 | */
48 | featureToNumber: function featureToNumber(feature) {
49 | this.addFeature(feature);
50 | return this.featureNameToFeatureIndex[feature];
51 | },
52 | numberToFeature: function numberToFeature(number) {
53 | return this.featureIndexToFeatureName[number];
54 | },
55 |
56 | /**
57 | * Convert the given hash of features to a numeric array, using 0 for padding.
58 | * If some features in the hash do not exist - they will be added.
59 | * @param hash any hash, for example, {a: 111, b: 222, c: 333}
60 | * @return a matching array, based on the current feature table. For example: [0, 111, 222, 0, 333]
61 | * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
62 | */
63 | hashToArray: function hashToArray(hash) {
64 | this.addFeatures(hash);
65 | var array = [];
66 |
67 | for (var featureIndex = 0; featureIndex < this.featureIndexToFeatureName.length; ++featureIndex) {
68 | array[featureIndex] = 0;
69 | }
70 |
71 | if (hash instanceof Array) {
72 | for (var i in hash) {
73 | array[this.featureNameToFeatureIndex[hash[i]]] = true;
74 | }
75 | } else if (hash instanceof Object) {
76 | for (var feature in hash) {
77 | array[this.featureNameToFeatureIndex[feature]] = hash[feature];
78 | }
79 | } else throw new Error("Unsupported type: " + JSON.stringify(hash));
80 |
81 | return array;
82 | },
83 |
84 | /**
85 | * Convert all the given hashes of features to numeric arrays, using 0 for padding.
86 | * If some features in some of the hashes do not exist - they will be added.
87 | * @param hashes an array of hashes, for example, [{a: 111, b: 222}, {a: 11, c: 33}, ...]
88 | * @return an array of matching arrays, based on the current feature table. For example: [[111, 222], [11, 0, 33]]
89 | */
90 | hashesToArrays: function hashesToArrays(hashes) {
91 | this.addFeaturess(hashes);
92 | var arrays = [];
93 |
94 | for (var i = 0; i < hashes.length; ++i) {
95 | arrays[i] = [];
96 |
97 | for (var feature in this.featureNameToFeatureIndex) {
98 | arrays[i][this.featureNameToFeatureIndex[feature]] = hashes[i][feature] || 0;
99 | }
100 | }
101 |
102 | return arrays;
103 | },
104 |
105 | /**
106 | * Convert the given numeric array to a hash of features, ignoring zero values.
107 | * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
108 | */
109 | arrayToHash: function arrayToHash(array) {
110 | var hash = {};
111 |
112 | for (var feature in this.featureNameToFeatureIndex) {
113 | if (array[this.featureNameToFeatureIndex[feature]]) hash[feature] = array[this.featureNameToFeatureIndex[feature]];
114 | }
115 |
116 | return hash;
117 | },
118 |
119 | /**
120 | * Convert the given numeric arrays to array of hashes of features, ignoring zero values.
121 | */
122 | arraysToHashes: function arraysToHashes(arrays) {
123 | var hashes = [];
124 |
125 | for (var i = 0; i < arrays.length; ++i) {
126 | hashes[i] = this.arrayToHash(arrays[i]);
127 | }
128 |
129 | return hashes;
130 | },
131 | toJSON: function toJSON() {
132 | return {
133 | featureIndexToFeatureName: this.featureIndexToFeatureName,
134 | featureNameToFeatureIndex: this.featureNameToFeatureIndex
135 | };
136 | },
137 | fromJSON: function fromJSON(json) {
138 | this.featureIndexToFeatureName = json.featureIndexToFeatureName;
139 | this.featureNameToFeatureIndex = json.featureNameToFeatureIndex;
140 | }
141 | };
142 | module.exports = FeatureLookupTable;
--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MetaLabelerWinnowTest.js:
--------------------------------------------------------------------------------
1 | /**
2 | * a unit-test for Multi-Label classification in the Meta-Labeler method,
3 | * with Modified Balanced Winnow as the underlying binary classifier.
4 | */
5 |
6 | import { multilabel, Winnow } from "../../../dist/core";
7 | import "../../sorted";
8 |
9 | var retrain_count = 10;
10 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, {
11 | binaryClassifierType: Winnow.bind(this, {
12 | promotion: 1.5,
13 | demotion: 0.5,
14 | margin: 1,
15 | retrain_count: retrain_count
16 | })
17 | });
18 |
19 | var MetaLabelerWinnow = multilabel.MetaLabeler.bind(this, {
20 | rankerType: BinaryRelevanceWinnow,
21 | counterType: BinaryRelevanceWinnow
22 | });
23 |
24 | var dataset = [
25 | { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class
26 | { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect)
27 | { input: { I: 1, want: 1, cc: 1 }, output: [{ C: "c" }] } // train on structured class, that will be stringified to "{C:c}".
28 | ];
29 |
30 | describe("Meta-Labeler batch-trained on Single-class inputs", function() {
31 | var classifierBatch = new MetaLabelerWinnow();
32 | classifierBatch.trainBatch(dataset);
33 |
34 | var classifier = classifierBatch;
35 | it("classifies 1-class samples", function() {
36 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
37 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
38 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']);
39 | });
40 |
41 | it("knows its classes", function() {
42 | classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']);
43 | });
44 |
45 | it("explains its decisions", function() {
46 | var ab = classifier.classify(
47 | { I: 1, want: 1, aa: 1, and: 1, bb: 1 },
48 | /*explain=*/ 3
49 | );
50 | //console.dir(ab);
51 | ab.should.have.property("explanation").with.property("ranking");
52 | ab.should.have.property("explanation").with.property("counting");
53 | });
54 | });
55 |
56 | describe("Meta-Labeler online-trained on Single-class inputs", function() {
57 | var classifierOnline = new MetaLabelerWinnow();
58 | for (var i = 0; i <= retrain_count; ++i)
59 | for (var d = 0; d < dataset.length; ++d)
60 | classifierOnline.trainOnline(dataset[d].input, dataset[d].output);
61 |
62 | var classifier = classifierOnline;
63 | it("classifies 1-class samples", function() {
64 | classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
65 | classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
66 | classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']);
67 | });
68 |
69 | it("knows its classes", function() {
70 | classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']);
71 | });
72 |
73 | it("explains its decisions", function() {
74 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(3);
75 | classifier
76 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
77 | .should.have.property("explanation")
78 | .with.property("ranking");
79 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
80 | classifier
81 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
82 | .should.have.property("explanation")
83 | .with.property("counting");
84 | });
85 | });
86 |
87 | describe("Meta-Labeler batch-trained on two-class inputs", function() {
88 | var classifier = new MetaLabelerWinnow();
89 | classifier.trainBatch([
90 | { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] },
91 | { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] },
92 | { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] },
93 | { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] }
94 | ]);
95 |
96 | it("classifies 2-class samples", function() {
97 | classifier
98 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 })
99 | .sorted()
100 | .should.eql(["A", "B"]);
101 | classifier
102 | .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 })
103 | .sorted()
104 | .should.eql(["B", "C"]);
105 | classifier
106 | .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 })
107 | .sorted()
108 | .should.eql(["C", "D"]);
109 | classifier
110 | .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 })
111 | .sorted()
112 | .should.eql(["A", "D"]);
113 | });
114 |
115 | it("explains its decisions", function() {
116 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4);
117 | classifier
118 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
119 | .should.have.property("explanation")
120 | .with.property("ranking");
121 | // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
122 | classifier
123 | .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
124 | .should.have.property("explanation")
125 | .with.property("counting");
126 | });
127 | });
128 |
--------------------------------------------------------------------------------
/dist/utils/partitions.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /**
4 | * Utilities for partitioning datasets of documents for training and testing.
5 | *
6 | * @author Erel Segal-haLevi
7 | * @since 2013-06
8 | */
9 | var _ = require("underscore")._;
10 | /**
11 | * Create a single partition of the given dataset.
12 | *
13 | * @param dataset an array.
14 | * @param testSetStart an index into the array.
15 | * @param testSetCount int - the num of samples in the test set, starting from testSetStart.
16 | * @return an object {train: trainSet, test: testSet}s
17 | */
18 |
19 |
20 | exports.partition = function (dataset, testSetStart, testSetCount) {
21 | var datasetclone = JSON.parse(JSON.stringify(dataset));
22 | var testSet = datasetclone.splice(testSetStart, testSetCount);
23 | var trainSet = datasetclone; // without the test-set
24 |
25 | return {
26 | train: trainSet,
27 | test: testSet
28 | };
29 | };
30 | /**
31 | * Create several different partitions of the given dataset to train and test.
32 | * Useful for cross-validation.
33 | *
34 | * @param dataset any array.
35 | * @param numOfPartitions number of different partitions to generate.
36 | * @param callback a function to call for each partition.
37 | *
38 | * @return an object: {train: [array-for-train], test: [array-for-test]}
39 | * @note code adapted from Heather Arthur: https://github.com/harthur/classifier/blob/master/test/cross-validation/cross-validate.js
40 | */
41 |
42 |
43 | exports.partitions = function (dataset, numOfPartitions, callback) {
44 | var shuffledDataset = _.shuffle(dataset);
45 |
46 | var testSetCount = dataset.length / numOfPartitions;
47 |
48 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
49 | var testSetStart = iPartition * testSetCount;
50 | var partition = exports.partition(dataset, testSetStart, testSetCount);
51 | callback(partition.train, partition.test, iPartition);
52 | }
53 | };
54 | /**
55 | * Create several different partitions of the given dataset to train and test without doing shuffling
56 | * Useful for cross-validation in Threshold classifier.
57 | *
58 | */
59 |
60 |
61 | exports.partitions_consistent_by_fold = function (dataset, numOfPartitions, partitionIndex) {
62 | if (!_.isArray(dataset)) throw new Error("dataset is not an array");
63 | if (_.isUndefined(numOfPartitions)) throw new Error("numOfPartitions " + numOfPartitions);
64 | if (_.isUndefined(partitionIndex)) throw new Error("partitionIndex " + partitionIndex);
65 | var testSetCount = dataset.length / numOfPartitions;
66 | var result = {
67 | 'train': [],
68 | 'test': []
69 | };
70 |
71 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
72 | var testSetStart = iPartition * testSetCount;
73 | var partition = exports.partition(dataset, testSetStart, testSetCount);
74 |
75 | if (iPartition == partitionIndex) {
76 | result['train'] = partition.train;
77 | result['test'] = partition.test;
78 | }
79 | }
80 |
81 | return result;
82 | };
83 |
84 | exports.partitions_consistent = function (dataset, numOfPartitions, callback) {
85 | var testSetCount = dataset.length / numOfPartitions;
86 |
87 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
88 | var testSetStart = iPartition * testSetCount;
89 | var partition = exports.partition(dataset, testSetStart, testSetCount);
90 | callback(partition.train, partition.test, iPartition);
91 | }
92 | };
93 |
94 | exports.partitions_reverese = function (dataset, numOfPartitions, callback) {
95 | var testSetCount = dataset.length / numOfPartitions;
96 |
97 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
98 | var testSetStart = iPartition * testSetCount;
99 | var partition = exports.partition(dataset, testSetStart, testSetCount);
100 | callback(partition.test, partition.train, iPartition);
101 | }
102 | };
103 |
104 | exports.partitions_hash = function (datasetor, numOfPartitions, callback) {
105 | var count = datasetor[Object.keys(datasetor)[0]].length;
106 | var testSetCount = Math.floor(count / numOfPartitions);
107 |
108 | for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
109 | var testSetStart = iPartition * testSetCount;
110 | var dataset = JSON.parse(JSON.stringify(datasetor));
111 | var test = [];
112 | var train = [];
113 |
114 | _(count - testSetCount).times(function (n) {
115 | train.push([]);
116 | });
117 |
118 | _.each(dataset, function (value, key, list) {
119 | test = test.concat(value.splice(testSetStart, testSetCount));
120 |
121 | _.each(value, function (elem, key1, list1) {
122 | train[key1].push(elem);
123 | }, this);
124 | }, this);
125 |
126 | callback(train, test, iPartition);
127 | }
128 | };
129 |
130 | exports.partitions_hash_fold = function (datasetor, numOfPartitions, fold) {
131 | var count = datasetor[Object.keys(datasetor)[0]].length;
132 | var testSetCount = Math.floor(count / numOfPartitions);
133 | var testSetStart = fold * testSetCount; // var dataset = JSON.parse(JSON.stringify(datasetor))
134 |
135 | var test = [];
136 | var train = [];
137 |
138 | _(count - testSetCount).times(function (n) {
139 | train.push([]);
140 | });
141 |
142 | _.each(datasetor, function (value, key, list) {
143 | test = test.concat(value.splice(testSetStart, testSetCount));
144 |
145 | _.each(value, function (elem, key1, list1) {
146 | train[key1].push(elem);
147 | }, this);
148 | }, this);
149 |
150 | return {
151 | "train": train,
152 | "test": test
153 | };
154 | };
--------------------------------------------------------------------------------
/dist/core/multilabel/MetaLabeler.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
4 |
5 | var hash = require("../../utils/hash");
6 |
7 | var sprintf = require("sprintf").sprintf;
8 |
9 | var _ = require("underscore")._;
10 | /**
11 | * MetaLabeler - Multi-label classifier, based on:
12 | *
13 | * Tang Lei, Rajan Suju, Narayanan Vijay K.. Large scale multi-label classification via metalabeler in Proceedings of the 18th international conference on World wide webWWW '09(New York, NY, USA):211-220ACM 2009.
14 | * http://www.citeulike.org/user/erelsegal-halevi/article/4860265
15 | *
16 | * A MetaLabeler uses two multi-class classifiers to create a single multi-label classifier. One is called "ranker" and the other is called "counter".
17 | *
18 | * The MetaLabeler assigns labels to a sample in the following two stages:
19 | * - Stage 1: Ranking. The sample is sent to the "ranker", which returns all available labels ordered from the most relevant to the least relevant.
20 | * - Stage 2: Counting. The sample is sent to the "counter", which returns integer C >= 0 which represents a number of labels.
21 | * The MetaLabeler returns the C most relevant labels from the list returned by the ranker.
22 | *
23 | * @param opts
24 | * rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels.
25 | * counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels.
26 | */
27 |
28 |
29 | var MetaLabeler = function MetaLabeler(opts) {
30 | _classCallCheck(this, MetaLabeler);
31 |
32 | if (!opts.rankerType) {
33 | console.dir(opts);
34 | throw new Error("opts.rankerType not found");
35 | }
36 |
37 | if (!opts.counterType) {
38 | console.dir(opts);
39 | throw new Error("opts.counterType not found");
40 | }
41 |
42 | this.ranker = new opts.rankerType();
43 | this.counter = new opts.counterType();
44 | };
45 |
46 | MetaLabeler.prototype = {
47 | /**
48 | * Tell the classifier that the given sample belongs to the given classes.
49 | *
50 | * @param sample a document.
51 | * @param labels an array whose VALUES are classes.
52 | */
53 | trainOnline: function trainOnline(sample, labels) {
54 | // The ranker is just trained by the given set of relevant labels:
55 | this.ranker.trainOnline(sample, labels); // The counter is trained by the *number* of relevant labels:
56 |
57 | var labelCount = (Array.isArray(labels) ? labels : Object.keys(labels)).length;
58 | this.counter.trainOnline(sample, labelCount);
59 | },
60 |
61 | /**
62 | * Train the classifier with all the given documents.
63 | *
64 | * @param dataset
65 | * an array with objects of the format:
66 | * {input: sample1, output: [class11, class12...]}
67 | */
68 | trainBatch: function trainBatch(dataset) {
69 | // The ranker is just trained by the given set of labels relevant to each sample:
70 | this.ranker.trainBatch(dataset); // The counter is trained by the *number* of labels relevant to each sample:
71 |
72 | var labelCountDataset = dataset.map(function (datum) {
73 | var labelCount = Array.isArray(datum.output) ? datum.output.length : 1;
74 | return {
75 | input: datum.input,
76 | output: labelCount
77 | };
78 | });
79 | this.counter.trainBatch(labelCountDataset);
80 | },
81 |
82 | /**
83 | * Use the model trained so far to classify a new sample.
84 | *
85 | * @param sample a document.
86 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
87 | *
88 | * @return an array whose VALUES are classes.
89 | */
90 | classify: function classify(sample, explain) {
91 | var rankedLabelsWithExplain = this.ranker.classify(sample, explain,
92 | /*withScores=*/
93 | true);
94 | var rankedLabels = explain > 0 ? rankedLabelsWithExplain.classes : rankedLabelsWithExplain;
95 | var labelCountWithExplain = this.counter.classify(sample, explain,
96 | /*withScores=*/
97 | true);
98 | var labelCount = explain > 0 ? labelCountWithExplain.classes[0][0] : labelCountWithExplain[0][0];
99 | if (_.isString(labelCount)) labelCount = parseInt(labelCount); // Pick the labelCount most relevant labels from the list returned by the ranker:
100 |
101 | var positiveLabelsWithScores = rankedLabels.slice(0, labelCount);
102 | var positiveLabels = positiveLabelsWithScores;
103 | if (positiveLabelsWithScores.length != 0) if (_.isArray(positiveLabelsWithScores[0])) var positiveLabels = positiveLabelsWithScores.map(function (labelWithScore) {
104 | return labelWithScore[0];
105 | });
106 | return explain > 0 ? {
107 | classes: positiveLabels,
108 | explanation: {
109 | ranking: rankedLabelsWithExplain.explanation,
110 | counting: labelCountWithExplain.explanation
111 | }
112 | } : positiveLabels;
113 | },
114 | getAllClasses: function getAllClasses() {
115 | return this.ranker.getAllClasses();
116 | },
117 | toJSON: function toJSON() {},
118 | fromJSON: function fromJSON(json) {},
119 |
120 | /**
121 | * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations.
122 | */
123 | setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) {
124 | if (this.ranker.setFeatureLookupTable) this.ranker.setFeatureLookupTable(featureLookupTable);
125 | if (this.counter.setFeatureLookupTable) this.counter.setFeatureLookupTable(featureLookupTable);
126 | }
127 | };
128 | module.exports = MetaLabeler;
--------------------------------------------------------------------------------
/dist/core/decisiontree/DecisionTree.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | /* Implementation of Decision Tree classifier, ID3 implementation
4 | the code based on https://github.com/bugless/nodejs-decision-tree-id3/blob/master/lib/decision-tree.js
5 | */
6 | var _ = require('underscore');
7 |
8 | function DecisionTree(opts) {
9 | if (!opts) opts = {}; // this.debug = opts.debug || false;
10 | }
11 |
12 | DecisionTree.prototype = {
13 | toJSON: function toJSON() {
14 | return this.root;
15 | },
16 | fromJSON: function fromJSON(json) {
17 | this.root = json;
18 | },
19 | createTree: function createTree(dataset, features) {
20 | var targets = _.unique(_.pluck(dataset, 'output'));
21 |
22 | if (targets.length == 1) {
23 | // console.log("end node! "+targets[0]);
24 | return {
25 | type: "result",
26 | val: targets[0],
27 | name: targets[0],
28 | alias: targets[0] + this.randomTag()
29 | };
30 | }
31 |
32 | if (features.length == 0) {
33 | // console.log("returning the most dominate feature!!!");
34 | var topTarget = this.mostCommon(targets);
35 | return {
36 | type: "result",
37 | val: topTarget,
38 | name: topTarget,
39 | alias: topTarget + this.randomTag()
40 | };
41 | }
42 |
43 | var bestFeature = this.maxGain(dataset, features);
44 |
45 | var remainingFeatures = _.without(features, bestFeature);
46 |
47 | var possibleValues = _.unique(_.pluck(_.pluck(dataset, 'input'), bestFeature));
48 |
49 | var node = {
50 | name: bestFeature,
51 | alias: bestFeature + this.randomTag()
52 | };
53 | node.type = "feature";
54 | node.vals = _.map(possibleValues, function (v) {
55 | var _newS = dataset.filter(function (x) {
56 | return x['input'][bestFeature] == v;
57 | });
58 |
59 | var child_node = {
60 | name: v,
61 | alias: v + this.randomTag(),
62 | type: "feature_value"
63 | };
64 | child_node.child = this.createTree(_newS, remainingFeatures);
65 | return child_node;
66 | }, this);
67 | return node;
68 | },
69 | mostCommon: function mostCommon(l) {
70 | return _.sortBy(l, function (a) {
71 | return this.count(a, l);
72 | }, this).reverse()[0];
73 | },
74 | count: function count(a, l) {
75 | return _.filter(l, function (b) {
76 | return b === a;
77 | }).length;
78 | },
79 | randomTag: function randomTag() {
80 | return "_r" + Math.round(Math.random() * 1000000).toString();
81 | },
82 | extractFeatures: function extractFeatures(dataset) {
83 | var features = [];
84 |
85 | for (var record in dataset) {
86 | for (var key in dataset[record]['input']) {
87 | features.push(key);
88 | }
89 | }
90 |
91 | return features;
92 | },
93 | gain: function gain(dataset, feature) {
94 | var attrVals = _.unique(_.pluck(_.pluck(dataset, 'input'), feature));
95 |
96 | var setEntropy = this.entropy(_.pluck(dataset, 'output'));
97 |
98 | var setSize = _.size(dataset);
99 |
100 | var entropies = attrVals.map(function (n) {
101 | var subset = dataset.filter(function (x) {
102 | return x['input'][feature] === n;
103 | });
104 | return subset.length / setSize * this.entropy(_.pluck(subset, 'output'));
105 | }, this);
106 | var sumOfEntropies = entropies.reduce(function (a, b) {
107 | return a + b;
108 | }, 0);
109 | return setEntropy - sumOfEntropies;
110 | },
111 | entropy: function entropy(vals) {
112 | var uniqueVals = _.unique(vals);
113 |
114 | var probs = uniqueVals.map(function (x) {
115 | return this.prob(x, vals);
116 | }, this);
117 | var logVals = probs.map(function (p) {
118 | return -p * this.log2(p);
119 | }, this);
120 | return logVals.reduce(function (a, b) {
121 | return a + b;
122 | }, 0);
123 | },
124 | prob: function prob(val, vals) {
125 | var instances = _.filter(vals, function (x) {
126 | return x === val;
127 | }).length;
128 |
129 | var total = vals.length;
130 | return instances / total;
131 | },
132 | log2: function log2(n) {
133 | return Math.log(n) / Math.log(2);
134 | },
135 | maxGain: function maxGain(dataset, features) {
136 | return _.max(features, function (e) {
137 | return this.gain(dataset, e);
138 | }, this);
139 | },
140 | setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) {
141 | this.featureLookupTable = featureLookupTable;
142 | },
143 |
144 | /**
145 | * Batch training (a set of samples). Uses the option this.retrain_count.
146 | *
147 | * @param dataset an array of samples of the form {input: {feature1: value1...} , output: 0/1}
148 | */
149 | trainBatch: function trainBatch(dataset) {
150 | var features = this.extractFeatures(dataset);
151 | this.root = this.createTree(dataset, features);
152 | },
153 |
154 | /**
155 | * @param inputs a SINGLE sample (a hash of feature-value pairs).
156 | * @param continuous_output if true, return the net classification value. If false [default], return 0 or 1.
157 | * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
158 | * @return the classification of the sample.
159 | */
160 | classify: function classify(features) {
161 | root = this.root;
162 |
163 | while (root.type !== "result") {
164 | var attr = root.name;
165 | var sampleVal = features[attr];
166 |
167 | var childNode = _.detect(root.vals, function (x) {
168 | return x.name == sampleVal;
169 | });
170 |
171 | if (childNode) {
172 | root = childNode.child;
173 | } else {
174 | root = root.vals[0].child;
175 | }
176 | }
177 |
178 | return root.val;
179 | }
180 | };
181 | module.exports = DecisionTree;
--------------------------------------------------------------------------------