├── docs
    ├── CNAME
    ├── .vuepress
    │   ├── public
    │   │   ├── logo.png
    │   │   ├── favicon.ico
    │   │   ├── apple-icon.png
    │   │   ├── favicon-16x16.png
    │   │   ├── favicon-32x32.png
    │   │   ├── favicon-96x96.png
    │   │   ├── ms-icon-70x70.png
    │   │   ├── apple-icon-57x57.png
    │   │   ├── apple-icon-60x60.png
    │   │   ├── apple-icon-72x72.png
    │   │   ├── apple-icon-76x76.png
    │   │   ├── ms-icon-144x144.png
    │   │   ├── ms-icon-150x150.png
    │   │   ├── ms-icon-310x310.png
    │   │   ├── android-icon-36x36.png
    │   │   ├── android-icon-48x48.png
    │   │   ├── android-icon-72x72.png
    │   │   ├── android-icon-96x96.png
    │   │   ├── apple-icon-114x114.png
    │   │   ├── apple-icon-120x120.png
    │   │   ├── apple-icon-144x144.png
    │   │   ├── apple-icon-152x152.png
    │   │   ├── apple-icon-180x180.png
    │   │   ├── android-icon-144x144.png
    │   │   ├── android-icon-192x192.png
    │   │   ├── apple-icon-precomposed.png
    │   │   ├── browserconfig.xml
    │   │   └── manifest.json
    │   └── config.js
    ├── README.md
    ├── .gitignore
    └── package.json
├── src
    ├── core
    │   ├── .gitignore
    │   ├── svm
    │   │   ├── SvmJsDemo.js
    │   │   ├── SvmPerfDemo.js
    │   │   ├── SvmLinearDemo.js
    │   │   ├── SvmLinearMulticlassDemo.js
    │   │   ├── SvmJs.js
    │   │   └── svmcommon.js
    │   ├── neural
    │   │   └── NeuralNetwork.js
    │   ├── winnow
    │   │   └── WinnowHashDemo.js
    │   └── multilabel
    │   │   ├── index.js
    │   │   ├── multilabelutils.js
    │   │   ├── BinaryRelevanceDemo.js
    │   │   └── MetaLabeler.js
    ├── features
    │   ├── LowerCaseNormalizer.js
    │   ├── README.md
    │   ├── NGramsOfWords.js
    │   ├── NGramsFromArray.js
    │   ├── RegexpNormalizer.js
    │   ├── NGramsOfLetters.js
    │   ├── RegexpSplitter.js
    │   ├── HypernymExtractor.js
    │   ├── index.js
    │   └── FeatureLookupTable.js
    ├── formats
    │   ├── index.js
    │   ├── json.js
    │   ├── tsv.js
    │   ├── svmlight.js
    │   └── arff.js
    ├── utils
    │   ├── index.js
    │   ├── hamming.js
    │   ├── list.js
    │   ├── unseen_correlation.js
    │   └── partitions.js
    └── index.js
├── test
    ├── mocha.opts
    ├── tempfiles
    │   └── .gitignore
    ├── wordcounts.js
    ├── sorted.js
    ├── utilsTest
    │   ├── HammingDistanceTest.js
    │   ├── ListTest.js
    │   └── PartitionsTest.js
    ├── test_utils.js
    ├── generaterandom.js
    ├── featuresTest
    │   ├── FeatureLookupTableTest.js
    │   ├── RegexpNormalizerTest.js
    │   ├── RegexpSplitterTest.js
    │   └── FeatureExtractorTest.js
    └── classifiersTest
    │   ├── multilabel
    │       ├── ClassifierWithSplitterTest.js
    │       ├── MulticlassSegmentationBayesTest.js
    │       ├── MetaLabelerLanguageModelTest.js
    │       ├── MetaLabelerSvmTest.js
    │       └── MetaLabelerWinnowTest.js
    │   ├── WinnowExampleTest.js
    │   ├── NeuralWithSpellCheckerTest.js
    │   ├── SvmJsTest.js
    │   ├── NeuralWithFeatureExtractorTest.js
    │   ├── NeuralWithNormalizerTest.js
    │   ├── WinnowTest.js
    │   ├── SvmMulticlassTest.js
    │   └── SvmTest.js
├── .babelrc
├── renovate.json
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── custom.md
    │   ├── feature_request.md
    │   └── bug_report.md
    └── FUNDING.yml
├── index.js
├── dist
    ├── features
    │   ├── LowerCaseNormalizer.js
    │   ├── NGramsOfWords.js
    │   ├── NGramsFromArray.js
    │   ├── RegexpNormalizer.js
    │   ├── NGramsOfLetters.js
    │   ├── RegexpSplitter.js
    │   ├── HypernymExtractor.js
    │   ├── index.js
    │   └── FeatureLookupTable.js
    ├── utils
    │   ├── index.js
    │   ├── hamming.js
    │   ├── list.js
    │   ├── unseen_correlation.js
    │   └── partitions.js
    ├── formats
    │   ├── index.js
    │   ├── json.js
    │   ├── tsv.js
    │   ├── svmlight.js
    │   └── arff.js
    ├── core
    │   ├── svm
    │   │   ├── SvmJsDemo.js
    │   │   ├── SvmPerfDemo.js
    │   │   ├── SvmLinearDemo.js
    │   │   ├── SvmLinearMulticlassDemo.js
    │   │   ├── SvmJs.js
    │   │   └── svmcommon.js
    │   ├── neural
    │   │   └── NeuralNetwork.js
    │   ├── decisiontree
    │   │   ├── DecisionTreeDemo.js
    │   │   └── DecisionTree.js
    │   ├── winnow
    │   │   └── WinnowHashDemo.js
    │   └── multilabel
    │   │   ├── index.js
    │   │   ├── multilabelutils.js
    │   │   ├── BinaryRelevanceDemo.js
    │   │   └── MetaLabeler.js
    └── index.js
├── .gitignore
├── package.json
├── README.md
└── CODE_OF_CONDUCT.md


/docs/CNAME:
--------------------------------------------------------------------------------
1 | neuro.js.org


--------------------------------------------------------------------------------
/src/core/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/mocha.opts:
--------------------------------------------------------------------------------
1 | --recursive
2 | 


--------------------------------------------------------------------------------
/test/tempfiles/.gitignore:
--------------------------------------------------------------------------------
1 | /Svm*.*


--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 |     "presets": ["@babel/preset-env"]
3 | }


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": [
3 |     "config:base"
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/.vuepress/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/logo.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon.ico


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-16x16.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-32x32.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/favicon-96x96.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-70x70.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-57x57.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-60x60.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-72x72.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-76x76.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-144x144.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-150x150.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/ms-icon-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/ms-icon-310x310.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-36x36.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-48x48.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-72x72.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-96x96.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-114x114.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-120x120.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-144x144.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-152x152.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-180x180.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-144x144.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/android-icon-192x192.png


--------------------------------------------------------------------------------
/docs/.vuepress/public/apple-icon-precomposed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intelligo-mn/neuro/HEAD/docs/.vuepress/public/apple-icon-precomposed.png


--------------------------------------------------------------------------------
/src/features/LowerCaseNormalizer.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * Normalizes a sentence by converting it to lower case.
3 |  */
4 | export default function (sample) {
5 | 	return sample.toLowerCase();
6 | };
7 | 


--------------------------------------------------------------------------------
/src/formats/index.js:
--------------------------------------------------------------------------------
1 | export const arff = require("./arff");
2 | export const json = require("./json");
3 | export const tsv = require("./tsv");
4 | export const svmlight = require("./svmlight");
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | export const classifiers = require('./dist/core');
2 | export const features = require('./dist/features');
3 | export const formats = require('./dist/formats');
4 | export const utils = require('./dist/utils');
5 | 


--------------------------------------------------------------------------------
/src/features/README.md:
--------------------------------------------------------------------------------
1 | This folder should contain several kinds of feature extractors.
2 | 
3 | A feature extractor is a function that takes an input object, and returns a features object for that object, for use in training and/or classification.
4 | 


--------------------------------------------------------------------------------
/docs/.vuepress/public/browserconfig.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <browserconfig><msapplication><tile><square70x70logo src="/ms-icon-70x70.png"/><square150x150logo src="/ms-icon-150x150.png"/><square310x310logo src="/ms-icon-310x310.png"/><TileColor>#ffffff</TileColor></tile></msapplication></browserconfig>


--------------------------------------------------------------------------------
/dist/features/LowerCaseNormalizer.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * Normalizes a sentence by converting it to lower case.
10 |  */
11 | function _default(sample) {
12 |   return sample.toLowerCase();
13 | }
14 | 
15 | ;


--------------------------------------------------------------------------------
/src/utils/index.js:
--------------------------------------------------------------------------------
 1 | var trainAndTest = require("./trainAndTest"); 
 2 | module.exports = {
 3 | 	hash: require("./hash"),
 4 | 	partitions: require("./partitions"),
 5 | 	PrecisionRecall: require("./PrecisionRecall"),
 6 | 	test: trainAndTest.test,
 7 | 	compare: trainAndTest.compare,
 8 | 	hammingDistance: require("./hamming").hammingDistance,
 9 | };
10 | 


--------------------------------------------------------------------------------
/dist/utils/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | var trainAndTest = require("./trainAndTest");
 4 | 
 5 | module.exports = {
 6 |   hash: require("./hash"),
 7 |   partitions: require("./partitions"),
 8 |   PrecisionRecall: require("./PrecisionRecall"),
 9 |   test: trainAndTest.test,
10 |   compare: trainAndTest.compare,
11 |   hammingDistance: require("./hamming").hammingDistance
12 | };


--------------------------------------------------------------------------------
/test/wordcounts.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Simple calculation of word-counts in a sentence. 
 3 |  * @param sentence
 4 |  * @return a hash {word1: count1, word2: count2,...}
 5 |  * words are separated by spaces.
 6 |  */
 7 | export default function (sentence) {
 8 | 	return sentence.split(' ').reduce(function (counts, word) {
 9 | 		counts[word] = (counts[word] || 0) + 1;
10 | 		return counts;
11 | 	}, {});
12 | }


--------------------------------------------------------------------------------
/src/formats/json.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * convert a single dataset to compact JSON format.
 3 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 4 |  */
 5 | export function toJSON(dataset) {
 6 | 	json = "[";
 7 | 	for (var i=0; i<dataset.length; ++i) {
 8 | 		json += (
 9 | 			(i>0? "\n, ": "\n  ")+
10 | 			JSON.stringify(dataset[i]));
11 | 	}	
12 | 	json += "\n]\n";
13 | 	return json;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/src/utils/hamming.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Calculate Hamming distance between two sets
 3 |  * @param a, b - arrays
 4 |  * @return number of elements in a-b plus number of elements in b-a
 5 |  */
 6 | export function hammingDistance(a, b) {
 7 |   var d = 0;
 8 |   for (var i = 0; i < a.length; ++i) {
 9 |     if (b.indexOf(a[i]) < 0) d++;
10 |   }
11 |   for (var i = 0; i < b.length; ++i) {
12 |     if (a.indexOf(b[i]) < 0) d++;
13 |   }
14 |   return d;
15 | }
16 | 


--------------------------------------------------------------------------------
/src/features/NGramsOfWords.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * NGramExtractor - extracts sequences of words in a text as its features.
 3 |  */
 4 | 
 5 | import NGramsFromArray from './NGramsFromArray';
 6 | export default function (numOfWords, gap) {
 7 | 	return function (sample, features) {
 8 | 		var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
 9 | 			return !!a
10 | 		}); // all non-empty words
11 | 		NGramsFromArray(numOfWords, gap, words, features);
12 | 	};
13 | };
14 | 


--------------------------------------------------------------------------------
/dist/formats/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports.svmlight = exports.tsv = exports.json = exports.arff = void 0;
 7 | 
 8 | var arff = require("./arff");
 9 | 
10 | exports.arff = arff;
11 | 
12 | var json = require("./json");
13 | 
14 | exports.json = json;
15 | 
16 | var tsv = require("./tsv");
17 | 
18 | exports.tsv = tsv;
19 | 
20 | var svmlight = require("./svmlight");
21 | 
22 | exports.svmlight = svmlight;


--------------------------------------------------------------------------------
/test/sorted.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This unit adds a non-intrusive property "sorted" to the Array prototype.
 3 |  * 
 4 |  * It is used only for testing, when the order of the output array is not important. For example:
 5 |  * 
 6 |  * classifier.classify("I want aa bb").sorted().should.eql(['A','B']);
 7 |  * 
 8 |  * @author Erel Segal-Halevi
 9 |  * @since 2013-09-09
10 |  */
11 | 
12 | Object.defineProperty(Array.prototype, 'sorted', {
13 | 	value: function() {	this.sort(); return this; }
14 | });
15 | 


--------------------------------------------------------------------------------
/src/formats/tsv.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Small utility for writing a dataset in tab-separated-values format.
 3 |  *
 4 |  * @author Erel Segal-Halevi
 5 |  * @since 2013-08
 6 |  */
 7 | 
 8 | 
 9 | /**
10 |  * Write the dataset, one sample per line, with the given separator between sample and output. 
11 |  */
12 | exports.toTSV = function(dataset, separator) {
13 | 	if (!separator) {
14 | 		separator="\t";
15 | 	} 
16 | 	dataset.forEach(function(sample) {
17 | 		console.log(JSON.stringify(sample.input)+separator+"["+sample.output+"]");
18 | 	});
19 | };
20 | 


--------------------------------------------------------------------------------
/dist/formats/json.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports.toJSON = toJSON;
 7 | 
 8 | /**
 9 |  * convert a single dataset to compact JSON format.
10 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
11 |  */
12 | function toJSON(dataset) {
13 |   json = "[";
14 | 
15 |   for (var i = 0; i < dataset.length; ++i) {
16 |     json += (i > 0 ? "\n, " : "\n  ") + JSON.stringify(dataset[i]);
17 |   }
18 | 
19 |   json += "\n]\n";
20 |   return json;
21 | }


--------------------------------------------------------------------------------
/dist/formats/tsv.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /**
 4 |  * Small utility for writing a dataset in tab-separated-values format.
 5 |  *
 6 |  * @author Erel Segal-Halevi
 7 |  * @since 2013-08
 8 |  */
 9 | 
10 | /**
11 |  * Write the dataset, one sample per line, with the given separator between sample and output. 
12 |  */
13 | exports.toTSV = function (dataset, separator) {
14 |   if (!separator) {
15 |     separator = "\t";
16 |   }
17 | 
18 |   dataset.forEach(function (sample) {
19 |     console.log(JSON.stringify(sample.input) + separator + "[" + sample.output + "]");
20 |   });
21 | };


--------------------------------------------------------------------------------
/dist/utils/hamming.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports.hammingDistance = hammingDistance;
 7 | 
 8 | /**
 9 |  * Calculate Hamming distance between two sets
10 |  * @param a, b - arrays
11 |  * @return number of elements in a-b plus number of elements in b-a
12 |  */
13 | function hammingDistance(a, b) {
14 |   var d = 0;
15 | 
16 |   for (var i = 0; i < a.length; ++i) {
17 |     if (b.indexOf(a[i]) < 0) d++;
18 |   }
19 | 
20 |   for (var i = 0; i < b.length; ++i) {
21 |     if (a.indexOf(b[i]) < 0) d++;
22 |   }
23 | 
24 |   return d;
25 | }


--------------------------------------------------------------------------------
/test/utilsTest/HammingDistanceTest.js:
--------------------------------------------------------------------------------
 1 | #!mocha
 2 | 
 3 | /**
 4 |  * a unit-test for Multi-Label classification
 5 |  */
 6 | 
 7 | import { hammingDistance } from "../../dist/utils";
 8 | 
 9 | describe("Hamming distance", function() {
10 |   it("calculates hamming distance", function() {
11 |     hammingDistance([], []).should.equal(0);
12 |     hammingDistance(["a"], []).should.equal(1);
13 |     hammingDistance([], ["a"]).should.equal(1);
14 |     hammingDistance(["a"], ["a"]).should.equal(0);
15 |     hammingDistance(["a"], ["b"]).should.equal(2);
16 |     hammingDistance(["a", "b"], ["b", "c"]).should.equal(2);
17 |   });
18 | });
19 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: turtuvshin
 5 | open_collective: intelligo
 6 | ko_fi: turtuvshin
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/src/features/NGramsFromArray.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
 3 |  */
 4 | export default function (numOfWords, gap, grams, features) {
 5 | 	for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
 6 | 		grams.unshift("[start]");
 7 | 		grams.push("[end]");
 8 | 	}
 9 | 	for (var i = 0; i <= grams.length - numOfWords; ++i) {
10 | 		let sliceOfWords = grams.slice(i, i + numOfWords);
11 | 		if (gap) sliceOfWords[1] = "-";
12 | 		let feature = sliceOfWords.join(" ");
13 | 		features[feature.trim()] = 1;
14 | 	}
15 | 	for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
16 | 		grams.pop();
17 | 		grams.shift();
18 | 	}
19 | };


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/src/core/svm/SvmJsDemo.js:
--------------------------------------------------------------------------------
 1 | // simple demonstration of SVM
 2 | 
 3 | var SvmJs = require('./SvmJs');
 4 | 
 5 | var svm = new SvmJs({C: 1.0});
 6 | 
 7 | var traindata = [
 8 |      {input: [0,0], output: 0},
 9 |      {input: [0,1], output: 0}, 
10 |      {input: [1,0], output: 1}, 
11 |    	 {input: [1,1], output: 1},
12 |      ];
13 | 
14 | svm.trainBatch(traindata);
15 | 
16 | console.dir(svm.classify([0,2]));  // 0
17 | console.dir(svm.classify([1,3]));  // 1
18 | 
19 | // explain:
20 | console.dir(svm.classify([0,2], 3));  // 0
21 | console.dir(svm.classify([1,3], 3));  // 1
22 | 
23 | 
24 | //continuous output:
25 | console.dir(svm.classify([0,2], 0, true));  // -1
26 | console.dir(svm.classify([1,3], 0, true));  // 1
27 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | home: true
 3 | heroImage: /logo.png
 4 | heroText: Neuro
 5 | tagline: Neuro.js is machine learning framework for building AI assistants and chat-bots.
 6 | actionText: Get Started →
 7 | actionLink: /learn/
 8 | features:
 9 | - title: ML with Javascript
10 |   details: Neuro is a library for developing and training ML models in JavaScript, and deploying in browser or on Node.js
11 | - title: Awesome Feature
12 |   details: Neuro is a supports Multi label classification, online learning, real-time classification.
13 | - title: Simplicity & performance
14 |   details: Everyone should have access to simple machine learning. Practical machine learning should be simple.
15 | footer: MIT Licensed | Powered by Intelligo Systems
16 | ---


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import EnhancedClassifier from "./core/EnhancedClassifier";
 2 | import multilabel from "./core/multilabel";
 3 | import NeuralNetwork from "./core/neural/NeuralNetwork";
 4 | import SvmJs from "./core/svm/SvmJs";
 5 | import SvmLinear from "./core/svm/SvmLinear";
 6 | import SvmPerf from "./core/svm/SvmPerf";
 7 | import Winnow from "./core/winnow/WinnowHash";
 8 | import features from "./features";
 9 | import formats from "./formats";
10 | import utils from "./utils";
11 | 
12 | export default {
13 |   classifiers: {
14 |     NeuralNetwork,
15 |     SvmJs,
16 |     SvmLinear,
17 |     SvmPerf,
18 |     Winnow,
19 |     multilabel,
20 |     EnhancedClassifier
21 |   },
22 |   features,
23 |   formats,
24 |   utils
25 | };
26 | 


--------------------------------------------------------------------------------
/src/core/neural/NeuralNetwork.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain
 3 |  *
 4 |  * @author Erel Segal-haLevi
 5 |  * @since 2013-09-29
 6 |  */
 7 | 
 8 | var NeuralNetwork = require('brain.js').NeuralNetwork;
 9 | 
10 | NeuralNetwork.prototype.trainOnline = function () {throw new Error("NeuralNetwork does not support online training");}; 
11 | NeuralNetwork.prototype.train  = function(dataset) {
12 | 	dataset.forEach(function(datum) {
13 | 		if (!Array.isArray(datum.output) && !(datum.output instanceof Object)){
14 | 			datum.output = [datum.output];
15 | 		}
16 | 	});
17 | 	this.train(dataset); 
18 | };
19 | NeuralNetwork.prototype.classify  = NeuralNetwork.prototype.run; 
20 | 
21 | module.exports = NeuralNetwork;
22 | 


--------------------------------------------------------------------------------
/dist/core/svm/SvmJsDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | // simple demonstration of SVM
 4 | var SvmJs = require('./SvmJs');
 5 | 
 6 | var svm = new SvmJs({
 7 |   C: 1.0
 8 | });
 9 | var traindata = [{
10 |   input: [0, 0],
11 |   output: 0
12 | }, {
13 |   input: [0, 1],
14 |   output: 0
15 | }, {
16 |   input: [1, 0],
17 |   output: 1
18 | }, {
19 |   input: [1, 1],
20 |   output: 1
21 | }];
22 | svm.trainBatch(traindata);
23 | console.dir(svm.classify([0, 2])); // 0
24 | 
25 | console.dir(svm.classify([1, 3])); // 1
26 | // explain:
27 | 
28 | console.dir(svm.classify([0, 2], 3)); // 0
29 | 
30 | console.dir(svm.classify([1, 3], 3)); // 1
31 | //continuous output:
32 | 
33 | console.dir(svm.classify([0, 2], 0, true)); // -1
34 | 
35 | console.dir(svm.classify([1, 3], 0, true)); // 1


--------------------------------------------------------------------------------
/dist/features/NGramsOfWords.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | var _NGramsFromArray = _interopRequireDefault(require("./NGramsFromArray"));
 9 | 
10 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
11 | 
12 | /**
13 |  * NGramExtractor - extracts sequences of words in a text as its features.
14 |  */
15 | function _default(numOfWords, gap) {
16 |   return function (sample, features) {
17 |     var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
18 |       return !!a;
19 |     }); // all non-empty words
20 | 
21 |     (0, _NGramsFromArray["default"])(numOfWords, gap, words, features);
22 |   };
23 | }
24 | 
25 | ;


--------------------------------------------------------------------------------
/src/core/winnow/WinnowHashDemo.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Demonstrates the winnow classification algorithm.
 3 |  * 
 4 |  * @author Erel Segal-Halevi
 5 |  * @since 2013-07
 6 |  */
 7 | 
 8 | console.log("Winnow demo start");
 9 | var Winnow = require('./WinnowHash');
10 | 
11 | var classifier = new Winnow({
12 | 	default_positive_weight: 1,
13 | 	default_negative_weight: 1,
14 | 	threshold: 0,
15 | 	do_averaging: false,
16 | 	margin: 1,
17 | });
18 | 
19 | classifier.trainOnline({'a': 1, 'b': 0}, 0);
20 | classifier.trainOnline({'a': 0, 'b': 1}, 0);
21 | classifier.trainOnline({'a': 0, 'b': 0}, 1);
22 | 
23 | console.dir(classifier.classify({'a': 0, 'b': 0}, /*explain=*/1));
24 | console.dir(classifier.classify({'a': 1, 'b': 1}, /*explain=*/3));
25 | 
26 | console.log("Winnow demo end");
27 | 


--------------------------------------------------------------------------------
/dist/core/neural/NeuralNetwork.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /**
 4 |  * A wrapper for Heather Arthur's brain.js package: https://github.com/harthur/brain
 5 |  *
 6 |  * @author Erel Segal-haLevi
 7 |  * @since 2013-09-29
 8 |  */
 9 | var NeuralNetwork = require('brain.js').NeuralNetwork;
10 | 
11 | NeuralNetwork.prototype.trainOnline = function () {
12 |   throw new Error("NeuralNetwork does not support online training");
13 | };
14 | 
15 | NeuralNetwork.prototype.train = function (dataset) {
16 |   dataset.forEach(function (datum) {
17 |     if (!Array.isArray(datum.output) && !(datum.output instanceof Object)) {
18 |       datum.output = [datum.output];
19 |     }
20 |   });
21 |   this.train(dataset);
22 | };
23 | 
24 | NeuralNetwork.prototype.classify = NeuralNetwork.prototype.run;
25 | module.exports = NeuralNetwork;


--------------------------------------------------------------------------------
/src/core/multilabel/index.js:
--------------------------------------------------------------------------------
 1 | export const BinaryRelevance = require('./BinaryRelevance');
 2 | export const BinarySegmentation = require('./BinarySegmentation');
 3 | export const MulticlassSegmentation = require('./MulticlassSegmentation');
 4 | export const MetaLabeler = require('./MetaLabeler');
 5 | export const CrossLanguageModel = require('./CrossLangaugeModelClassifier');
 6 | export const ThresholdClassifier = require('./ThresholdClassifier');
 7 | 
 8 | // add a "classify and log" method to all classifiers, for demos:
 9 | for (var classifierClass in module.exports) {
10 | 	if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify)
11 | 		module.exports[classifierClass].prototype.classifyAndLog = function(sample) {
12 | 			console.log(sample+" is "+this.classify(sample));
13 | 		}
14 | }
15 | 


--------------------------------------------------------------------------------
/dist/features/NGramsFromArray.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
10 |  */
11 | function _default(numOfWords, gap, grams, features) {
12 |   for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
13 |     grams.unshift("[start]");
14 |     grams.push("[end]");
15 |   }
16 | 
17 |   for (var i = 0; i <= grams.length - numOfWords; ++i) {
18 |     var sliceOfWords = grams.slice(i, i + numOfWords);
19 |     if (gap) sliceOfWords[1] = "-";
20 |     var feature = sliceOfWords.join(" ");
21 |     features[feature.trim()] = 1;
22 |   }
23 | 
24 |   for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
25 |     grams.pop();
26 |     grams.shift();
27 |   }
28 | }
29 | 
30 | ;


--------------------------------------------------------------------------------
/dist/core/decisiontree/DecisionTreeDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | var _DecisionTree = _interopRequireDefault(require("./DecisionTree"));
 4 | 
 5 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
 6 | 
 7 | console.log("Decision Tree demo start");
 8 | var classifier = new _DecisionTree["default"]({});
 9 | dataset = [{
10 |   input: {
11 |     a: 1,
12 |     b: 0
13 |   },
14 |   output: 0
15 | }, {
16 |   input: {
17 |     a: 0,
18 |     b: 1
19 |   },
20 |   output: 0
21 | }, {
22 |   input: {
23 |     a: 0,
24 |     b: 0
25 |   },
26 |   output: 1
27 | }];
28 | classifier.trainBatch(dataset);
29 | console.dir(classifier.classify({
30 |   'a': 0,
31 |   'b': 0
32 | },
33 | /*explain=*/
34 | 1));
35 | console.dir(classifier.classify({
36 |   'a': 1,
37 |   'b': 1
38 | },
39 | /*explain=*/
40 | 3));
41 | console.log("Decision Tree demo end");


--------------------------------------------------------------------------------
/dist/core/winnow/WinnowHashDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /**
 4 |  * Demonstrates the winnow classification algorithm.
 5 |  * 
 6 |  * @author Erel Segal-Halevi
 7 |  * @since 2013-07
 8 |  */
 9 | console.log("Winnow demo start");
10 | 
11 | var Winnow = require('./WinnowHash');
12 | 
13 | var classifier = new Winnow({
14 |   default_positive_weight: 1,
15 |   default_negative_weight: 1,
16 |   threshold: 0,
17 |   do_averaging: false,
18 |   margin: 1
19 | });
20 | classifier.trainOnline({
21 |   'a': 1,
22 |   'b': 0
23 | }, 0);
24 | classifier.trainOnline({
25 |   'a': 0,
26 |   'b': 1
27 | }, 0);
28 | classifier.trainOnline({
29 |   'a': 0,
30 |   'b': 0
31 | }, 1);
32 | console.dir(classifier.classify({
33 |   'a': 0,
34 |   'b': 0
35 | },
36 | /*explain=*/
37 | 1));
38 | console.dir(classifier.classify({
39 |   'a': 1,
40 |   'b': 1
41 | },
42 | /*explain=*/
43 | 3));
44 | console.log("Winnow demo end");


--------------------------------------------------------------------------------
/docs/.vuepress/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "name": "App",
 3 |  "icons": [
 4 |   {
 5 |    "src": "\/android-icon-36x36.png",
 6 |    "sizes": "36x36",
 7 |    "type": "image\/png",
 8 |    "density": "0.75"
 9 |   },
10 |   {
11 |    "src": "\/android-icon-48x48.png",
12 |    "sizes": "48x48",
13 |    "type": "image\/png",
14 |    "density": "1.0"
15 |   },
16 |   {
17 |    "src": "\/android-icon-72x72.png",
18 |    "sizes": "72x72",
19 |    "type": "image\/png",
20 |    "density": "1.5"
21 |   },
22 |   {
23 |    "src": "\/android-icon-96x96.png",
24 |    "sizes": "96x96",
25 |    "type": "image\/png",
26 |    "density": "2.0"
27 |   },
28 |   {
29 |    "src": "\/android-icon-144x144.png",
30 |    "sizes": "144x144",
31 |    "type": "image\/png",
32 |    "density": "3.0"
33 |   },
34 |   {
35 |    "src": "\/android-icon-192x192.png",
36 |    "sizes": "192x192",
37 |    "type": "image\/png",
38 |    "density": "4.0"
39 |   }
40 |  ]
41 | }


--------------------------------------------------------------------------------
/src/features/RegexpNormalizer.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * normalizes a sentence based on a list of regular expressions.
 3 |  * @param normalizations - an array of objects {source: /regexp/g, target: "target"}
 4 |  * @param sample - a string.
 5 |  * @return a new string, with all normalizations carried out.
 6 |  */
 7 | export default function (normalizations) {
 8 | 	return function (sample) {
 9 | 		normalizations.forEach(function (normalization) {
10 | 			var matches = null;
11 | 			if (normalization.source instanceof RegExp) {
12 | 				if (!normalization.source.global) {
13 | 					console.warn("normalization source, " + normalization.source + ", is not global - skipping");
14 | 					return;
15 | 				}
16 | 			} else {
17 | 				normalization.source = new RegExp(normalization.source, "gi");
18 | 			}
19 | 			sample = sample.replace(normalization.source, normalization.target);
20 | 			//console.log(sample);
21 | 		});
22 | 		return sample;
23 | 	};
24 | };
25 | 


--------------------------------------------------------------------------------
/src/features/NGramsOfLetters.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Extracts substrings of letters of a given size.
 3 |  */
 4 | 
 5 | var PAD_CHAR = '#';
 6 | 
 7 | /**
 8 |  * Add letter n-gram features to the given feature-vector.
 9 |  *
10 |  * @param numOfLetters - a positive integer.
11 |  * @param caseSensitive - boolean. if false, convert all to lower case.
12 |  * @param sample - a string.
13 |  * @param features an initial hash of features (optional).
14 |  * @return a hash with all the different letter n-grams contained in the given sentence.
15 |  */
16 | export default function (numOfLetters, caseSensitive) {
17 | 	return function (sample, features) {
18 | 		if (!caseSensitive) sample = sample.toLowerCase();
19 | 		for (var i = 0; i < numOfLetters - 1; ++i)
20 | 			sample = PAD_CHAR + sample + PAD_CHAR;
21 | 		for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) {
22 | 			var feature = sample.substr(firstLetter, numOfLetters);
23 | 			features[feature] = 1;
24 | 		}
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/test/test_utils.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Module test_utils contains helpful routines for running test of existing classifiers,
 3 |  currently both of them are the copy from different modules
 4 | 
 5 | */
 6 | import PrecisionRecall from '../dist/utils/PrecisionRecall';
 7 | 
 8 | export function test(dataset, classifier) {
 9 | 	var currentStats = new PrecisionRecall();
10 | 	for (var i = 0; i < dataset.length; ++i) {
11 | 		var expectedClasses = dataset[i].output;
12 | 		var actualClasses = classifier.classify(dataset[i].input);
13 | 		currentStats.addCasesHash(expectedClasses, actualClasses, true);
14 | 	}
15 | 	return currentStats;
16 | }
17 | export function F1_evaluation(stats, type_of_averaging) {
18 | 	if (type_of_averaging == 0) {
19 | 		if ((stats['TP'] == 0) || (stats['TP'] + stats['FP'] == 0) || (stats['TP'] + stats['FN'] == 0))
20 | 			return 0;
21 | 		var precision = stats['TP'] / (stats['TP'] + stats['FP']);
22 | 		var recall = stats['TP'] / (stats['TP'] + stats['FN']);
23 | 		var f1 = (precision * recall) / (precision + recall);
24 | 		return f1;
25 | 	}
26 | }


--------------------------------------------------------------------------------
/test/generaterandom.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Generating random string with given number of words and generating random list 
 3 |  * with given length with element from the given list
 4 |  */
 5 | 
 6 | export function random_string(length) {
 7 |   var chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz';
 8 |   length = length ? length : 10;
 9 |   var string = '';
10 |   for (var i = 0; i < length; i++) {
11 |     var word_length = Math.floor(Math.random() * 10 + 1);
12 |     for (var j = 0; j <= word_length; j++) {
13 |       var randomNumber = Math.floor(Math.random() * chars.length);
14 |       var ch = chars.substring(randomNumber, randomNumber + 1);
15 |       string += ch;
16 |     }
17 |     string += " ";
18 |   }
19 |   return string;
20 | }
21 | export function random_list_length(list) {
22 |   return this.random_list(Math.floor(Math.random() * 5), list);
23 | }
24 | export function random_list(length, list) {
25 |   var result = [];
26 |   for (var i = 0; i < length; i++) {
27 |     result.push(list[Math.floor(Math.random() * list.length)]);
28 |   }
29 |   return result;
30 | }


--------------------------------------------------------------------------------
/src/core/svm/SvmPerfDemo.js:
--------------------------------------------------------------------------------
 1 | // simple demonstration of binary SVM, based on SVM-Perf
 2 | 
 3 | var SvmPerf = require('./SvmPerf');
 4 | 
 5 | var trainSet = [
 6 | 		{input: [0,0], output: 0},
 7 | 		{input: [1,1], output: 0},
 8 | 		{input: [0,1], output: 1},
 9 | 		{input: [1,2], output: 1} ];
10 | 
11 | // the separating line goes through [0,0.5] and [1,1.5]. It is:
12 | //       0.5+x-y = 0
13 | // or:   2y-2x-1 = 0
14 | 
15 | 
16 | var classifier = new SvmPerf(
17 | 	{
18 | 		learn_args: "-c 20.0", 
19 | 		model_file_prefix: "tempfiles/SvmPerfDemo",
20 | 		debug:false
21 | 	}
22 | );
23 | classifier.trainBatch(trainSet);
24 | 
25 | // binary output:
26 | console.dir(classifier.classify([0,2]));  // 1
27 | console.dir(classifier.classify([1,0]));  // 0
28 | 
29 | console.dir(classifier.modelMap);   // { '0': -1, '1': -2, '2': 2 }
30 | 
31 | // explain:
32 | console.dir(classifier.classify([0,2], 3));  // 1
33 | console.dir(classifier.classify([1,0], 3));  // 0
34 | 
35 | // continuous output:
36 | console.dir(classifier.classify([0,2], 0, true));  // 3
37 | console.dir(classifier.classify([1,0], 0, true));  // -3
38 | 


--------------------------------------------------------------------------------
/dist/features/RegexpNormalizer.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * normalizes a sentence based on a list of regular expressions.
10 |  * @param normalizations - an array of objects {source: /regexp/g, target: "target"}
11 |  * @param sample - a string.
12 |  * @return a new string, with all normalizations carried out.
13 |  */
14 | function _default(normalizations) {
15 |   return function (sample) {
16 |     normalizations.forEach(function (normalization) {
17 |       var matches = null;
18 | 
19 |       if (normalization.source instanceof RegExp) {
20 |         if (!normalization.source.global) {
21 |           console.warn("normalization source, " + normalization.source + ", is not global - skipping");
22 |           return;
23 |         }
24 |       } else {
25 |         normalization.source = new RegExp(normalization.source, "gi");
26 |       }
27 | 
28 |       sample = sample.replace(normalization.source, normalization.target); //console.log(sample);
29 |     });
30 |     return sample;
31 |   };
32 | }
33 | 
34 | ;


--------------------------------------------------------------------------------
/src/features/RegexpSplitter.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * splits sentences using a custom regular expression.
 3 |  * @param regexpString - a string
 4 |  * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence.
 5 |  * @param text - a string.
 6 |  * @return an array of parts (sentences). 
 7 |  */
 8 | export default function (regexpString, delimitersToInclude) {
 9 | 	regexpString = "(" + regexpString + ")"; // to capture the delimiters
10 | 	var regexp = new RegExp(regexpString, "i");
11 | 	if (!delimitersToInclude) delimitersToInclude = {};
12 | 	return function (text) {
13 | 		var parts = text.split(regexp);
14 | 		var normalizedParts = [];
15 | 		for (var i = 0; i < parts.length; i += 2) {
16 | 			parts[i] = parts[i].trim();
17 | 			var part = parts[i];
18 | 			if (i + 1 < parts.length) {
19 | 				var delimiter = parts[i + 1];
20 | 				if (delimitersToInclude[delimiter])
21 | 					part += " " + delimiter;
22 | 			}
23 | 			if (part.length > 0)
24 | 				normalizedParts.push(part);
25 | 		}
26 | 		//console.log(text);
27 | 		//console.dir(normalizedParts);
28 | 		return normalizedParts;
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/dist/core/svm/SvmPerfDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | // simple demonstration of binary SVM, based on SVM-Perf
 4 | var SvmPerf = require('./SvmPerf');
 5 | 
 6 | var trainSet = [{
 7 |   input: [0, 0],
 8 |   output: 0
 9 | }, {
10 |   input: [1, 1],
11 |   output: 0
12 | }, {
13 |   input: [0, 1],
14 |   output: 1
15 | }, {
16 |   input: [1, 2],
17 |   output: 1
18 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is:
19 | //       0.5+x-y = 0
20 | // or:   2y-2x-1 = 0
21 | 
22 | var classifier = new SvmPerf({
23 |   learn_args: "-c 20.0",
24 |   model_file_prefix: "tempfiles/SvmPerfDemo",
25 |   debug: false
26 | });
27 | classifier.trainBatch(trainSet); // binary output:
28 | 
29 | console.dir(classifier.classify([0, 2])); // 1
30 | 
31 | console.dir(classifier.classify([1, 0])); // 0
32 | 
33 | console.dir(classifier.modelMap); // { '0': -1, '1': -2, '2': 2 }
34 | // explain:
35 | 
36 | console.dir(classifier.classify([0, 2], 3)); // 1
37 | 
38 | console.dir(classifier.classify([1, 0], 3)); // 0
39 | // continuous output:
40 | 
41 | console.dir(classifier.classify([0, 2], 0, true)); // 3
42 | 
43 | console.dir(classifier.classify([1, 0], 0, true)); // -3


--------------------------------------------------------------------------------
/dist/features/NGramsOfLetters.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * Extracts substrings of letters of a given size.
10 |  */
11 | var PAD_CHAR = '#';
12 | /**
13 |  * Add letter n-gram features to the given feature-vector.
14 |  *
15 |  * @param numOfLetters - a positive integer.
16 |  * @param caseSensitive - boolean. if false, convert all to lower case.
17 |  * @param sample - a string.
18 |  * @param features an initial hash of features (optional).
19 |  * @return a hash with all the different letter n-grams contained in the given sentence.
20 |  */
21 | 
22 | function _default(numOfLetters, caseSensitive) {
23 |   return function (sample, features) {
24 |     if (!caseSensitive) sample = sample.toLowerCase();
25 | 
26 |     for (var i = 0; i < numOfLetters - 1; ++i) {
27 |       sample = PAD_CHAR + sample + PAD_CHAR;
28 |     }
29 | 
30 |     for (var firstLetter = 0; firstLetter < sample.length - numOfLetters + 1; ++firstLetter) {
31 |       var feature = sample.substr(firstLetter, numOfLetters);
32 |       features[feature] = 1;
33 |     }
34 |   };
35 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | 
 8 | # Runtime data
 9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 | 
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 | 
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 | 
20 | # nyc test coverage
21 | .nyc_output
22 | 
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 | 
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 | 
39 | # TypeScript v1 declaration files
40 | typings/
41 | 
42 | # Optional npm cache directory
43 | .npm
44 | 
45 | # Optional eslint cache
46 | .eslintcache
47 | 
48 | # Optional REPL history
49 | .node_repl_history
50 | 
51 | # Output of 'npm pack'
52 | *.tgz
53 | 
54 | # Yarn Integrity file
55 | .yarn-integrity
56 | 
57 | # dotenv environment variables file
58 | .env
59 | 
60 | # next.js build output
61 | .next
62 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | 
 8 | # Runtime data
 9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 | 
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 | 
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 | 
20 | # nyc test coverage
21 | .nyc_output
22 | 
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 | 
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 | 
39 | # TypeScript v1 declaration files
40 | typings/
41 | 
42 | # Optional npm cache directory
43 | .npm
44 | 
45 | # Optional eslint cache
46 | .eslintcache
47 | 
48 | # Optional REPL history
49 | .node_repl_history
50 | 
51 | # Output of 'npm pack'
52 | *.tgz
53 | 
54 | # Yarn Integrity file
55 | .yarn-integrity
56 | 
57 | # dotenv environment variables file
58 | .env
59 | 
60 | # next.js build output
61 | .next
62 | 


--------------------------------------------------------------------------------
/test/utilsTest/ListTest.js:
--------------------------------------------------------------------------------
 1 | import { isEqual } from "underscore";
 2 | import { average, listembed, median, variance } from "../../dist/utils/list";
 3 | 
 4 | describe("List test function", function() {
 5 |   it("It should correctly calculate Variance", function() {
 6 |     let list = [170, 300, 430, 470, 600];
 7 |     variance(list).should.be.equal(21704);
 8 |   });
 9 | 
10 |   it("it should calculate average correctly", function() {
11 |     let list1 = [1, 2, 3, 4, 5, 6, 7];
12 |     average(list1).should.be.equal(4);
13 |   });
14 | 
15 |   it("it should calculate median correctly", function() {
16 |     var list1 = [3, 8, 9, 1, 5, 7, 9, 21];
17 |     median(list1).should.be.equal(7.5);
18 |   });
19 | 
20 |   it("it should know how to do embedding", function() {
21 |     isEqual(listembed(["label"]), [["label"]]).should.equal(true);
22 |     isEqual(listembed([]), [[]]).should.equal(true);
23 |     isEqual(listembed(undefined), [[]]).should.equal(true);
24 |     isEqual(listembed(null), [[]]).should.equal(true);
25 |     isEqual(
26 |       listembed({
27 |         classes: "label"
28 |       }),
29 |       ["label"]
30 |     ).should.equal(true);
31 |     isEqual(
32 |       listembed({
33 |         classes: ["label"]
34 |       }),
35 |       [["label"]]
36 |     ).should.equal(true);
37 |   });
38 | });
39 | 


--------------------------------------------------------------------------------
/dist/features/RegexpSplitter.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * splits sentences using a custom regular expression.
10 |  * @param regexpString - a string
11 |  * @param delimitersToInclude - a hash (set) of delimiters that will be added to the end of the previous sentence.
12 |  * @param text - a string.
13 |  * @return an array of parts (sentences). 
14 |  */
15 | function _default(regexpString, delimitersToInclude) {
16 |   regexpString = "(" + regexpString + ")"; // to capture the delimiters
17 | 
18 |   var regexp = new RegExp(regexpString, "i");
19 |   if (!delimitersToInclude) delimitersToInclude = {};
20 |   return function (text) {
21 |     var parts = text.split(regexp);
22 |     var normalizedParts = [];
23 | 
24 |     for (var i = 0; i < parts.length; i += 2) {
25 |       parts[i] = parts[i].trim();
26 |       var part = parts[i];
27 | 
28 |       if (i + 1 < parts.length) {
29 |         var delimiter = parts[i + 1];
30 |         if (delimitersToInclude[delimiter]) part += " " + delimiter;
31 |       }
32 | 
33 |       if (part.length > 0) normalizedParts.push(part);
34 |     } //console.log(text);
35 |     //console.dir(normalizedParts);
36 | 
37 | 
38 |     return normalizedParts;
39 |   };
40 | }


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "neuro.js.org",
 3 |   "description": "Documentation site for Neuro",
 4 |   "version": "1.0.0",
 5 |   "author": "Turtuvshin Byambaa <toroo.byamba@gmail.com>",
 6 |   "repository": {
 7 |     "type": "git",
 8 |     "url": "https://github.com/intelligo-systems/neuro.git"
 9 |   },
10 |   "engines": {
11 |     "node": ">=0.12"
12 |   },
13 |   "devDependencies": {
14 |     "@vuepress/plugin-google-analytics": "1.3.0",
15 |     "vuepress": "1.3.0",
16 |     "vuepress-plugin-janitor": "1.0.0",
17 |     "vuepress-plugin-reading-time": "0.1.1",
18 |     "vuepress-plugin-rss": "2.0.0",
19 |     "yaml-front-matter": "4.1.0"
20 |   },
21 |   "scripts": {
22 |     "dev": "vuepress dev",
23 |     "build": "vuepress build",
24 |     "test": "mocha"
25 |   },
26 |   "contributors": [
27 |     {
28 |       "name": "Turtuvshin Byambaa",
29 |       "email": "toroo.byamba@gmail.com"
30 |     }
31 |   ],
32 |   "keywords": [
33 |     "ai",
34 |     "ai-bot",
35 |     "artificial-intelligence",
36 |     "bot",
37 |     "chat",
38 |     "chatbot",
39 |     "classifier",
40 |     "conversation",
41 |     "framework",
42 |     "intelligo",
43 |     "intelligence",
44 |     "neural network",
45 |     "neural",
46 |     "neuro",
47 |     "network",
48 |     "neural-networks",
49 |     "machine-learning"
50 |   ],
51 |   "license": "MIT"
52 | }
53 | 


--------------------------------------------------------------------------------
/test/featuresTest/FeatureLookupTableTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for feature lookup tables
 3 |  */
 4 | 
 5 | import FeatureLookupTable from "../../dist/features/FeatureLookupTable";
 6 | 
 7 | var sample1 = {
 8 |   a: 111,
 9 |   b: 222,
10 |   c: 333
11 | };
12 | var sample2 = {
13 |   a: 1111,
14 |   d: 4444,
15 |   e: 5555
16 | };
17 | var sample3 = {
18 |   c: 33333,
19 |   e: 55555,
20 |   g: 77777
21 | };
22 | 
23 | describe("feature lookup table", function() {
24 |   it("adds samples incrementally", function() {
25 |     var table = new FeatureLookupTable();
26 |     var array1 = table.hashToArray(sample1);
27 |     var array2 = table.hashToArray(sample2);
28 |     var array3 = table.hashToArray(sample3);
29 |     array1.should.be.an.instanceOf(Array);
30 |     array2.should.be.an.instanceOf(Array);
31 |     array3.should.be.an.instanceOf(Array);
32 |     table.arrayToHash(array1).should.eql(sample1);
33 |     table.arrayToHash(array2).should.eql(sample2);
34 |     table.arrayToHash(array3).should.eql(sample3);
35 |   });
36 | 
37 |   it("adds all samples together", function() {
38 |     var table = new FeatureLookupTable();
39 |     var arrays = table.hashesToArrays([sample1, sample2, sample3]);
40 |     arrays.should.be.an.instanceOf(Array).and.have.lengthOf(3);
41 |     table.arraysToHashes(arrays).should.eql([sample1, sample2, sample3]);
42 |   });
43 | });
44 | 


--------------------------------------------------------------------------------
/dist/features/HypernymExtractor.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = _default;
 7 | 
 8 | /**
 9 |  * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text.
10 |  *
11 |  * A hypernym is described by a regular expression, a feature string, and a confidence score.
12 |  * For example: if regexp=/no (.*)/ and feature="without $1", then, 
13 |  *   if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1).
14 |  * 
15 |  * Adds hypernym features to the given feature-vector.
16 |  * 
17 |  * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence}
18 |  * @param sample - a string.
19 |  * @param features an initial hash of features (optional). The hypernym features will be added to that array.
20 |  */
21 | function _default(hypernyms) {
22 |   return function (sample, features) {
23 |     hypernyms.forEach(function (hypernym) {
24 |       if (!(hypernym.regexp instanceof RegExp)) {
25 |         hypernym.regexp = new RegExp(hypernym.regexp, "gi");
26 |       }
27 | 
28 |       if (hypernym.regexp.test(sample)) {
29 |         features[hypernym.feature] = hypernym.confidence;
30 |       }
31 |     });
32 |   };
33 | }
34 | 
35 | ;


--------------------------------------------------------------------------------
/src/features/HypernymExtractor.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * HypernymExtractor - extracts hypernyms - words and phrases that are entailed by the given text.
 3 |  *
 4 |  * A hypernym is described by a regular expression, a feature string, and a confidence score.
 5 |  * For example: if regexp=/no (.*)/ and feature="without $1", then, 
 6 |  *   if the sample contains "no car", the extractor will add the feature "without car", with the given confidence score (usually a number from 0 to 1).
 7 |  * 
 8 |  * Adds hypernym features to the given feature-vector.
 9 |  * 
10 |  * @param hypernyms - an array of objects {regexp: /regexp/g, feature: "feature", confidence: confidence}
11 |  * @param sample - a string.
12 |  * @param features an initial hash of features (optional). The hypernym features will be added to that array.
13 |  */
14 | export default function (hypernyms) {
15 |         return function (sample, features) {
16 |                 hypernyms.forEach(function (hypernym) {
17 |                         if (!(hypernym.regexp instanceof RegExp)) {
18 |                                 hypernym.regexp = new RegExp(hypernym.regexp, "gi");
19 |                         }
20 |                         if (hypernym.regexp.test(sample)) {
21 |                                 features[hypernym.feature] = hypernym.confidence;
22 |                         }
23 |                 });
24 |         };
25 | };
26 | 


--------------------------------------------------------------------------------
/src/core/svm/SvmLinearDemo.js:
--------------------------------------------------------------------------------
 1 | // simple demonstration of binary SVM, based on LibLinear
 2 | 
 3 | var SvmLinear = require('./SvmLinear');
 4 | var partitions = require(__dirname+'/../../utils/partitions');
 5 | 
 6 | var dataset = [
 7 | 		{input: [0,0], output: 0},
 8 | 		{input: [1,1], output: 0},
 9 | 		{input: [0,1], output: 1},
10 | 		{input: [1,2], output: 1} ];
11 | 
12 | // the separating line goes through [0,0.5] and [1,1.5]. It is:
13 | //       0.5+x-y = 0
14 | // or:   -1-2x+2y = 0
15 | 
16 | var classifier = new SvmLinear(
17 | 	{
18 | 		learn_args: "-c 20", 
19 | 		model_file_prefix: "tempfiles/SvmLinearDemo",
20 | 		train_command: "liblinear_train",
21 | 		test_command: "liblinear_test",
22 | 	        multiclass: false
23 | 	}
24 | );
25 | classifier.trainBatch(dataset);
26 | 
27 | console.log("simple classification: ");
28 | console.dir(classifier.classify([0,2]));  // 1
29 | console.dir(classifier.classify([1,0]));  // 0
30 | 
31 | console.log("model: ");
32 | console.dir(classifier.mapLabelToMapFeatureToWeight);   // { '0': -1, '1': -2, '2': 2 }
33 | 
34 | partitions.partitions(dataset.concat(dataset), 2, function(train, test, index) {
35 | 
36 | 	console.log("fold: "+index)
37 | 	classifier.trainBatch(train)
38 | 		
39 | 	test.forEach(function(instance) {
40 | 		console.dir("Classify instance:")
41 | 		console.dir(instance)
42 | 		console.dir(classifier.classify(instance.input));
43 | 	});
44 | });


--------------------------------------------------------------------------------
/test/featuresTest/RegexpNormalizerTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for Regular Expression Normalizer.
 3 |  */
 4 | 
 5 | import { RegexpNormalizer } from "../../dist/features";
 6 | 
 7 | describe("RegexpNormalizer", function() {
 8 |   it("normalizes simple strings", function() {
 9 |     var ren = RegexpNormalizer([
10 |       {
11 |         source: "can't",
12 |         target: "cannot"
13 |       },
14 |       {
15 |         source: "cannot",
16 |         target: "can not"
17 |       },
18 |       {
19 |         source: "won't",
20 |         target: "will not"
21 |       }
22 |     ]);
23 |     ren("I can't do it and I won't do it").should.eql(
24 |       "I can not do it and I will not do it"
25 |     );
26 |   });
27 |   it("normalizes regular expressions", function() {
28 |     var ren = RegexpNormalizer([
29 |       {
30 |         source: "\\b(...+)est\\b",
31 |         target: "$1"
32 |       },
33 |       {
34 |         source: "\\b(...+)er\\b",
35 |         target: "$1"
36 |       }
37 |     ]);
38 |     ren("faster and highest").should.eql("fast and high");
39 |   });
40 |   it("normalizes numbers", function() {
41 |     var ren = RegexpNormalizer([
42 |       {
43 |         source: "\\b(\\d+)k\\b",
44 |         target: "$1000"
45 |       }
46 |     ]);
47 |     ren("I want 7k dollars").should.eql("I want 7000 dollars");
48 |     ren("I want 70k dollars").should.eql("I want 70000 dollars");
49 |   });
50 | });
51 | 


--------------------------------------------------------------------------------
/dist/core/multilabel/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports.ThresholdClassifier = exports.CrossLanguageModel = exports.MetaLabeler = exports.MulticlassSegmentation = exports.BinarySegmentation = exports.BinaryRelevance = void 0;
 7 | 
 8 | var BinaryRelevance = require('./BinaryRelevance');
 9 | 
10 | exports.BinaryRelevance = BinaryRelevance;
11 | 
12 | var BinarySegmentation = require('./BinarySegmentation');
13 | 
14 | exports.BinarySegmentation = BinarySegmentation;
15 | 
16 | var MulticlassSegmentation = require('./MulticlassSegmentation');
17 | 
18 | exports.MulticlassSegmentation = MulticlassSegmentation;
19 | 
20 | var MetaLabeler = require('./MetaLabeler');
21 | 
22 | exports.MetaLabeler = MetaLabeler;
23 | 
24 | var CrossLanguageModel = require('./CrossLangaugeModelClassifier');
25 | 
26 | exports.CrossLanguageModel = CrossLanguageModel;
27 | 
28 | var ThresholdClassifier = require('./ThresholdClassifier'); // add a "classify and log" method to all classifiers, for demos:
29 | 
30 | 
31 | exports.ThresholdClassifier = ThresholdClassifier;
32 | 
33 | for (var classifierClass in module.exports) {
34 |   if (module.exports[classifierClass].prototype && module.exports[classifierClass].prototype.classify) module.exports[classifierClass].prototype.classifyAndLog = function (sample) {
35 |     console.log(sample + " is " + this.classify(sample));
36 |   };
37 | }


--------------------------------------------------------------------------------
/test/featuresTest/RegexpSplitterTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for Regular Expression Splitter.
 3 |  */
 4 | 
 5 | import { RegexpSplitter } from "../../dist/features";
 6 | 
 7 | describe("RegexpSplitter", function() {
 8 |   it("splits sentences without delimiter", function() {
 9 |     var res = RegexpSplitter("[.,;?!]|and");
10 |     res("Hi. Who are you? I am Intelligo Bot.").should.eql([
11 |       "Hi",
12 |       "Who are you",
13 |       "I am Intelligo Bot"
14 |     ]);
15 |     res("Hi.Who are you?I am Intelligo Bot.").should.eql([
16 |       "Hi",
17 |       "Who are you",
18 |       "I am Intelligo Bot"
19 |     ]);
20 |     res(
21 |       "Hi.       Who are you?           I am Intelligo Bot.          "
22 |     ).should.eql(["Hi", "Who are you", "I am Intelligo Bot"]);
23 |   });
24 |   it("splits sentences with delimiter", function() {
25 |     var res = RegexpSplitter("[.,;?!]|and", {
26 |       "?": true,
27 |       ".": false
28 |     });
29 |     res("Hi. Who are you? I am Intelligo Bot.").should.eql([
30 |       "Hi",
31 |       "Who are you ?",
32 |       "I am Intelligo Bot"
33 |     ]);
34 |     res("Hi.Who are you?I am Intelligo Bot.").should.eql([
35 |       "Hi",
36 |       "Who are you ?",
37 |       "I am Intelligo Bot"
38 |     ]);
39 |     res("Hi.        Who are you?        I am Intelligo Bot.").should.eql([
40 |       "Hi",
41 |       "Who are you ?",
42 |       "I am Intelligo Bot"
43 |     ]);
44 |   });
45 | });
46 | 


--------------------------------------------------------------------------------
/dist/core/svm/SvmLinearDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | // simple demonstration of binary SVM, based on LibLinear
 4 | var SvmLinear = require('./SvmLinear');
 5 | 
 6 | var partitions = require(__dirname + '/../../utils/partitions');
 7 | 
 8 | var dataset = [{
 9 |   input: [0, 0],
10 |   output: 0
11 | }, {
12 |   input: [1, 1],
13 |   output: 0
14 | }, {
15 |   input: [0, 1],
16 |   output: 1
17 | }, {
18 |   input: [1, 2],
19 |   output: 1
20 | }]; // the separating line goes through [0,0.5] and [1,1.5]. It is:
21 | //       0.5+x-y = 0
22 | // or:   -1-2x+2y = 0
23 | 
24 | var classifier = new SvmLinear({
25 |   learn_args: "-c 20",
26 |   model_file_prefix: "tempfiles/SvmLinearDemo",
27 |   train_command: "liblinear_train",
28 |   test_command: "liblinear_test",
29 |   multiclass: false
30 | });
31 | classifier.trainBatch(dataset);
32 | console.log("simple classification: ");
33 | console.dir(classifier.classify([0, 2])); // 1
34 | 
35 | console.dir(classifier.classify([1, 0])); // 0
36 | 
37 | console.log("model: ");
38 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
39 | 
40 | partitions.partitions(dataset.concat(dataset), 2, function (train, test, index) {
41 |   console.log("fold: " + index);
42 |   classifier.trainBatch(train);
43 |   test.forEach(function (instance) {
44 |     console.dir("Classify instance:");
45 |     console.dir(instance);
46 |     console.dir(classifier.classify(instance.input));
47 |   });
48 | });


--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = void 0;
 7 | 
 8 | var _EnhancedClassifier = _interopRequireDefault(require("./core/EnhancedClassifier"));
 9 | 
10 | var _multilabel = _interopRequireDefault(require("./core/multilabel"));
11 | 
12 | var _NeuralNetwork = _interopRequireDefault(require("./core/neural/NeuralNetwork"));
13 | 
14 | var _SvmJs = _interopRequireDefault(require("./core/svm/SvmJs"));
15 | 
16 | var _SvmLinear = _interopRequireDefault(require("./core/svm/SvmLinear"));
17 | 
18 | var _SvmPerf = _interopRequireDefault(require("./core/svm/SvmPerf"));
19 | 
20 | var _WinnowHash = _interopRequireDefault(require("./core/winnow/WinnowHash"));
21 | 
22 | var _features = _interopRequireDefault(require("./features"));
23 | 
24 | var _formats = _interopRequireDefault(require("./formats"));
25 | 
26 | var _utils = _interopRequireDefault(require("./utils"));
27 | 
28 | function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
29 | 
30 | var _default = {
31 |   classifiers: {
32 |     NeuralNetwork: _NeuralNetwork["default"],
33 |     SvmJs: _SvmJs["default"],
34 |     SvmLinear: _SvmLinear["default"],
35 |     SvmPerf: _SvmPerf["default"],
36 |     Winnow: _WinnowHash["default"],
37 |     multilabel: _multilabel["default"],
38 |     EnhancedClassifier: _EnhancedClassifier["default"]
39 |   },
40 |   features: _features["default"],
41 |   formats: _formats["default"],
42 |   utils: _utils["default"]
43 | };
44 | exports["default"] = _default;


--------------------------------------------------------------------------------
/src/formats/svmlight.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Small utility for writing a dataset in SVM-light format.
 3 |  *
 4 |  * @author Erel Segal-Halevi
 5 |  * @since 2013-09
 6 |  */
 7 | 
 8 | 
 9 | /**
10 |  * convert a single dataset to compact JSON format.
11 |  * @param dataset an array of samples in the format {input: [value1, value2, ...], output: (0|1)}
12 |  * @param bias if nonzero, add it at the beginning of the vector.
13 |  * @param binarize if true, change output to -1/1. If false, leave output as it is
14 |  */
15 | exports.toSvmLight = function(dataset, bias, binarize, firstFeatureNumber) {
16 | 	var lines = "";
17 | 	for (var i=0; i<dataset.length; ++i) {
18 | 		var line = (i>0? "\n": "") + 
19 | 			(binarize? (dataset[i].output>0? "1": "-1"): dataset[i].output) +  // in svm-light, the output comes first:
20 | 			featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber)
21 | 			;
22 | 		lines += line;
23 | 	};
24 | 	lines += "\n";
25 | 	return lines;
26 | }
27 | 
28 | 
29 | 
30 | /**
31 |  * convert an array of features to a single line in SVM-light format. The line starts with a space.
32 |  */
33 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) {
34 | 	if (!Array.isArray(features))
35 | 		throw new Error("Expected an array, but got "+JSON.stringify(features))
36 | 	var line = (bias? " "+firstFeatureNumber+":"+bias: "");
37 | 	for (var feature=0; feature<features.length; ++feature) {
38 | 		var value = features[feature];
39 | 		if (value)
40 | 			line += (" "+(feature+firstFeatureNumber+(bias?1:0))+":"+value.toPrecision(5));
41 | 	}
42 | 	return line;
43 | }
44 | 


--------------------------------------------------------------------------------
/dist/formats/svmlight.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /**
 4 |  * Small utility for writing a dataset in SVM-light format.
 5 |  *
 6 |  * @author Erel Segal-Halevi
 7 |  * @since 2013-09
 8 |  */
 9 | 
10 | /**
11 |  * convert a single dataset to compact JSON format.
12 |  * @param dataset an array of samples in the format {input: [value1, value2, ...], output: (0|1)}
13 |  * @param bias if nonzero, add it at the beginning of the vector.
14 |  * @param binarize if true, change output to -1/1. If false, leave output as it is
15 |  */
16 | exports.toSvmLight = function (dataset, bias, binarize, firstFeatureNumber) {
17 |   var lines = "";
18 | 
19 |   for (var i = 0; i < dataset.length; ++i) {
20 |     var line = (i > 0 ? "\n" : "") + (binarize ? dataset[i].output > 0 ? "1" : "-1" : dataset[i].output) + // in svm-light, the output comes first:
21 |     featureArrayToFeatureString(dataset[i].input, bias, firstFeatureNumber);
22 |     lines += line;
23 |   }
24 | 
25 |   ;
26 |   lines += "\n";
27 |   return lines;
28 | };
29 | /**
30 |  * convert an array of features to a single line in SVM-light format. The line starts with a space.
31 |  */
32 | 
33 | 
34 | function featureArrayToFeatureString(features, bias, firstFeatureNumber) {
35 |   if (!Array.isArray(features)) throw new Error("Expected an array, but got " + JSON.stringify(features));
36 |   var line = bias ? " " + firstFeatureNumber + ":" + bias : "";
37 | 
38 |   for (var feature = 0; feature < features.length; ++feature) {
39 |     var value = features[feature];
40 |     if (value) line += " " + (feature + firstFeatureNumber + (bias ? 1 : 0)) + ":" + value.toPrecision(5);
41 |   }
42 | 
43 |   return line;
44 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "neuro.js",
 3 |   "description": "Neuro.js is machine learning framework for building AI assistants and chat-bots.",
 4 |   "version": "0.1.7",
 5 |   "author": "Turtuvshin Byambaa <toroo.byamba@gmail.com>",
 6 |   "homepage": "https://neuro.js.org",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "https://github.com/intelligo-systems/neuro.git"
10 |   },
11 |   "engines": {
12 |     "node": ">=0.12"
13 |   },
14 |   "dependencies": {
15 |     "async": "3.1.1",
16 |     "brain.js": "1.6.1",
17 |     "graph-paths": "latest",
18 |     "intelligo": "1.3.0",
19 |     "languagemodel": "latest",
20 |     "lodash": "4.17.15",
21 |     "sprintf": "0.1.5",
22 |     "svm": "0.1.1",
23 |     "temp": "0.9.1",
24 |     "underscore": "1.9.1",
25 |     "wordsworth": "0.1.0"
26 |   },
27 |   "devDependencies": {
28 |     "@babel/cli": "7.8.4",
29 |     "@babel/core": "7.8.4",
30 |     "@babel/preset-env": "7.8.4",
31 |     "@babel/register": "7.8.3",
32 |     "mocha": "6.2.2",
33 |     "should": "13.2.3"
34 |   },
35 |   "scripts": {
36 |     "start": "node index.js",
37 |     "build": "babel src -d dist",
38 |     "test": "mocha --require @babel/register"
39 |   },
40 |   "contributors": [
41 |     {
42 |       "name": "Turtuvshin Byambaa",
43 |       "email": "toroo.byamba@gmail.com"
44 |     }
45 |   ],
46 |   "keywords": [
47 |     "ai",
48 |     "ai-bot",
49 |     "artificial-intelligence",
50 |     "bot",
51 |     "chat",
52 |     "chatbot",
53 |     "classifier",
54 |     "conversation",
55 |     "framework",
56 |     "intelligo",
57 |     "intelligence",
58 |     "neural network",
59 |     "neural",
60 |     "network",
61 |     "neural-networks",
62 |     "machine-learning"
63 |   ],
64 |   "license": "MIT"
65 | }
66 | 


--------------------------------------------------------------------------------
/src/utils/list.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Utilities for lists
 3 |  * 
 4 |  * @author Vasily Konovalov
 5 |  */
 6 | import {
 7 | 	reduce,
 8 | 	isObject,
 9 | 	isArray,
10 | 	each,
11 | 	clone
12 | } from 'underscore';
13 | 
14 | // Calculating the median of an array basically involves sorting the array and picking the middle number. 
15 | // If it’s an even amount of numbers you take the two numbers in the middle and average them.
16 | export function median(values) {
17 | 	values.sort(function (a, b) {
18 | 		return a - b;
19 | 	});
20 | 	var half = Math.floor(values.length / 2);
21 | 	if (values.length % 2)
22 | 		return values[half];
23 | 	else
24 | 		return (values[half - 1] + values[half]) / 2.0;
25 | }
26 | 
27 | export function variance(list) {
28 | 	sum = reduce(list, function (memo, num) {
29 | 		return memo + num;
30 | 	}, 0);
31 | 	exp = sum / list.length
32 | 	sum2 = reduce(list, function (memo, num) {
33 | 		return memo + num * num;
34 | 	}, 0);
35 | 	exp2 = sum2 / list.length
36 | 	return exp2 - exp * exp
37 | }
38 | 
39 | export function average(list) {
40 | 	let sum = reduce(list, function (memo, num) {
41 | 		return memo + num;
42 | 	}, 0);
43 | 	return sum / list.length
44 | }
45 | 
46 | // @input - list 
47 | // @output - embedded list
48 | export function listembed(label) {
49 | 	if ((label === null) || (label == undefined) || (typeof label == 'undefined'))
50 | 		return [
51 | 			[]
52 | 		]
53 | 	// if (typeof label != 'undefined')
54 | 	// else
55 | 	// {
56 | 	if ((isObject(label)) && !(isArray(label)))
57 | 		// if ('classes' in JSON.parse(label))
58 | 		if ('classes' in label)
59 | 			label = label.classes
60 | 
61 | 	if (!(label[0] instanceof Array))
62 | 		return [label]
63 | 	else
64 | 		return label
65 | 	// }
66 | 	// else
67 | 	// {
68 | 	// return [label]
69 | 	// }
70 | }
71 | 
72 | export function clonedataset(set) {
73 | 	set1 = []
74 | 	each(set, function (value, key, list) {
75 | 		set1.push(clone(value))
76 | 	})
77 | 	return set1
78 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # neuro.js
 2 | 
 3 | [![npm](https://img.shields.io/npm/v/neuro.js.svg?style=plastic)](https://www.npmjs.com/package/neuro.js)
 4 | [![npm](https://img.shields.io/npm/dt/neuro.js.svg?style=plastic)](https://www.npmjs.com/package/neuro.js)
 5 | [![GitHub license](https://img.shields.io/github/license/intelligo-systems/neuro.js.svg)](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE)
 6 | [![Twitter](https://img.shields.io/twitter/url/https/github.com/intelligo-systems/neuro.js.svg?style=social)](https://twitter.com/intent/tweet?text=Wow:&url=https%3A%2F%2Fgithub.com%2Fintelligo-systems%2Fintelligo)
 7 | 
 8 | Neuro.js is machine learning framework for building AI assistants and chat-bots.
 9 | 
10 | [![NPM](https://nodei.co/npm/neuro.js.png?downloads=true&downloadRank=true&stars=true)](https://nodei.co/npm/neuro.js/)
11 | 
12 | | [Installation][] | [Documentation][] | [Contributors][] | [License][] |
13 | |---|---|---|---|
14 | 
15 | # Installation
16 | 
17 | ```
18 | npm install neuro.js --save
19 | ```
20 | 
21 | ## Documentation
22 | 
23 | To check out docs, visit [neuro.js.org](https://neuro.js.org).
24 | 
25 | 
26 | ## Contributors
27 | 
28 | You may contribute in several ways like creating new features, fixing bugs, improving documentation and examples
29 | or translating any document here to your language. [Find more information in CODE_OF_CONDUCT.md](.github/CODE_OF_CONDUCT.md).
30 | <a href="https://github.com/intelligo-systems/neuro.js/graphs/contributors">Contributors</a>
31 | 
32 | ## License
33 | 
34 | > Copyright (C) 2019 Intelligo LLC.  
35 | > neuro.js is open-sourced software licensed under the [MIT](https://opensource.org/licenses/MIT) license.  
36 | > (See the [LICENSE](https://github.com/intelligo-systems/neuro.js/blob/master/LICENSE) file for the whole license text.)
37 | 
38 | **[⬆ back to top](#neuro.js)**
39 | 
40 | [Installation]:#installation
41 | [Documentation]:#documentation
42 | [Contributors]:#contributors
43 | [License]:#license
44 | 
45 | 


--------------------------------------------------------------------------------
/src/core/svm/SvmLinearMulticlassDemo.js:
--------------------------------------------------------------------------------
 1 | // simple demonstration of multiclass SVM, based on LibLinear
 2 | 
 3 | var SvmLinear = require('./SvmLinear');
 4 | 
 5 | var trainSet = [
 6 | 		{input: [0,0], output: 3},
 7 | 		{input: [1,1], output: 3},
 8 | 		
 9 | 		{input: [0,1], output: 4},
10 | 		{input: [1,2], output: 4},
11 | 		
12 | 		{input: [0,2], output: 5},
13 | 		{input: [1,3], output: 5},
14 | 		];
15 | 
16 | // One separating line goes through [0,0.5] and [1,1.5]. It is:
17 | //        0.5+x-y = 0
18 | // or:   -1-2x+2y = 0
19 | 
20 | //Another separating line goes through [0,1.5] and [1,2.5]. It is:
21 | //       1.5+x-y = 0
22 | //or:   -3-2x+2y = 0
23 | 
24 | 
25 | var classifier = new SvmLinear(
26 | 	{
27 | 		learn_args: "-c 20", 
28 | 		model_file_prefix: "tempfiles/SvmLinearMulticlassDemo",
29 | 		multiclass: true,
30 | 		debug: false
31 | 	}
32 | );
33 | classifier.trainBatch(trainSet);
34 | 
35 | console.log("simple classification: ");
36 | console.dir(classifier.classify([1,0]));  // 3
37 | console.dir(classifier.classify([0,1.3]));  // 4
38 | console.dir(classifier.classify([0,1.7]));  // 5
39 | console.dir(classifier.classify([0,3]));  // 5
40 | 
41 | console.log("model: ");
42 | console.dir(classifier.mapLabelToMapFeatureToWeight);   // { '0': -1, '1': -2, '2': 2 }
43 | 
44 | console.log("explained classification: ");
45 | console.dir(classifier.classify([1,0],3));  // 3
46 | console.dir(classifier.classify([0,1.3],3));  // 4
47 | console.dir(classifier.classify([0,1.7],3));  // 5
48 | console.dir(classifier.classify([0,3],3));  // 5
49 | 
50 | console.log("classification with scores: ");
51 | console.dir(classifier.classify([1,0],0,true));  // 3
52 | console.dir(classifier.classify([0,1.3],0,true));  // 4
53 | console.dir(classifier.classify([0,1.7],0,true));  // 5
54 | console.dir(classifier.classify([0,3],0,true));  // 5
55 | 
56 | console.log("explained classification with scores: ");
57 | console.dir(classifier.classify([1,0],3,true));  // 3
58 | console.dir(classifier.classify([0,1.3],3,true));  // 4
59 | console.dir(classifier.classify([0,1.7],3,true));  // 5
60 | console.dir(classifier.classify([0,3],3,true));  // 5
61 | 


--------------------------------------------------------------------------------
/src/core/multilabel/multilabelutils.js:
--------------------------------------------------------------------------------
 1 | var _ = require("underscore")._;
 2 | /**
 3 |  * A utility function, used by several multi-label classifiers.
 4 |  * 
 5 |  * @param scoresVector [[label1,score1],[label2,score2],...]
 6 |  * @param explain (int) if >0, return explanation.
 7 |  * @param withScores (boolean) if true, return the original scores vector.
 8 |  * @param threshold if withScores is false, all labels with scores above this threshold will be returned.
 9 |  */
10 | module.exports = {
11 | 		// iterate the list and collect the second item from the every element of the list
12 | 		getvalue: function (list) {
13 | 	                  val = []
14 |                           for (elem in list)
15 |                                { val.push(list[elem][1]) }
16 | 	                  return val
17 |                 },
18 | 
19 |                 normalizeClasses: function (expectedClasses) {
20 | 		        if (!_(expectedClasses).isArray())
21 | 			        expectedClasses = [expectedClasses];
22 | 
23 | 		        expectedClasses = expectedClasses.map(this.stringifyClass);
24 | 		        expectedClasses.sort();
25 | 		        return expectedClasses;
26 | 	        },
27 | 
28 | 		stringifyClass: function (aClass) {
29 | 		        return (_(aClass).isString()? aClass: JSON.stringify(aClass));
30 | 		},
31 | 		
32 | 		stringifyIfNeeded: function (label) {
33 | 			return (typeof(label)==='string'? label: JSON.stringify(label));
34 | 		},
35 | 
36 | 		normalizeOutputLabels: function(labels) {
37 | 			if (!Array.isArray(labels))
38 | 				labels = [labels];
39 | 			return labels.map(module.exports.stringifyIfNeeded);
40 | 		},
41 | 		
42 | 		mapScoresVectorToMultilabelResult: function(scoresVector, explain, withScores, threshold) {
43 | 			var results;
44 | 			if (withScores) {
45 | 				results = scoresVector;
46 | 			} else {
47 | 				results = [];
48 | 				scoresVector.forEach(function(pair) {
49 | 					if (pair[1]>=threshold)
50 | 						results.push(pair[0]);
51 | 				});
52 | 			}
53 | 			return explain>0? 	{
54 | 				classes: results, 
55 | 				explanation: scoresVector.map(function(pair) {return pair[0]+": "+pair[1];})
56 | 			}: 
57 | 			results; 
58 | 		}
59 | }
60 | 


--------------------------------------------------------------------------------
/dist/core/multilabel/multilabelutils.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | var _ = require("underscore")._;
 4 | /**
 5 |  * A utility function, used by several multi-label classifiers.
 6 |  * 
 7 |  * @param scoresVector [[label1,score1],[label2,score2],...]
 8 |  * @param explain (int) if >0, return explanation.
 9 |  * @param withScores (boolean) if true, return the original scores vector.
10 |  * @param threshold if withScores is false, all labels with scores above this threshold will be returned.
11 |  */
12 | 
13 | 
14 | module.exports = {
15 |   // iterate the list and collect the second item from the every element of the list
16 |   getvalue: function getvalue(list) {
17 |     val = [];
18 | 
19 |     for (elem in list) {
20 |       val.push(list[elem][1]);
21 |     }
22 | 
23 |     return val;
24 |   },
25 |   normalizeClasses: function normalizeClasses(expectedClasses) {
26 |     if (!_(expectedClasses).isArray()) expectedClasses = [expectedClasses];
27 |     expectedClasses = expectedClasses.map(this.stringifyClass);
28 |     expectedClasses.sort();
29 |     return expectedClasses;
30 |   },
31 |   stringifyClass: function stringifyClass(aClass) {
32 |     return _(aClass).isString() ? aClass : JSON.stringify(aClass);
33 |   },
34 |   stringifyIfNeeded: function stringifyIfNeeded(label) {
35 |     return typeof label === 'string' ? label : JSON.stringify(label);
36 |   },
37 |   normalizeOutputLabels: function normalizeOutputLabels(labels) {
38 |     if (!Array.isArray(labels)) labels = [labels];
39 |     return labels.map(module.exports.stringifyIfNeeded);
40 |   },
41 |   mapScoresVectorToMultilabelResult: function mapScoresVectorToMultilabelResult(scoresVector, explain, withScores, threshold) {
42 |     var results;
43 | 
44 |     if (withScores) {
45 |       results = scoresVector;
46 |     } else {
47 |       results = [];
48 |       scoresVector.forEach(function (pair) {
49 |         if (pair[1] >= threshold) results.push(pair[0]);
50 |       });
51 |     }
52 | 
53 |     return explain > 0 ? {
54 |       classes: results,
55 |       explanation: scoresVector.map(function (pair) {
56 |         return pair[0] + ": " + pair[1];
57 |       })
58 |     } : results;
59 |   }
60 | };


--------------------------------------------------------------------------------
/dist/utils/list.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports.median = median;
 7 | exports.variance = variance;
 8 | exports.average = average;
 9 | exports.listembed = listembed;
10 | exports.clonedataset = clonedataset;
11 | 
12 | var _underscore = require("underscore");
13 | 
14 | /**
15 |  * Utilities for lists
16 |  * 
17 |  * @author Vasily Konovalov
18 |  */
19 | // Calculating the median of an array basically involves sorting the array and picking the middle number. 
20 | // If it’s an even amount of numbers you take the two numbers in the middle and average them.
21 | function median(values) {
22 |   values.sort(function (a, b) {
23 |     return a - b;
24 |   });
25 |   var half = Math.floor(values.length / 2);
26 |   if (values.length % 2) return values[half];else return (values[half - 1] + values[half]) / 2.0;
27 | }
28 | 
29 | function variance(list) {
30 |   sum = (0, _underscore.reduce)(list, function (memo, num) {
31 |     return memo + num;
32 |   }, 0);
33 |   exp = sum / list.length;
34 |   sum2 = (0, _underscore.reduce)(list, function (memo, num) {
35 |     return memo + num * num;
36 |   }, 0);
37 |   exp2 = sum2 / list.length;
38 |   return exp2 - exp * exp;
39 | }
40 | 
41 | function average(list) {
42 |   var sum = (0, _underscore.reduce)(list, function (memo, num) {
43 |     return memo + num;
44 |   }, 0);
45 |   return sum / list.length;
46 | } // @input - list 
47 | // @output - embedded list
48 | 
49 | 
50 | function listembed(label) {
51 |   if (label === null || label == undefined || typeof label == 'undefined') return [[]]; // if (typeof label != 'undefined')
52 |   // else
53 |   // {
54 | 
55 |   if ((0, _underscore.isObject)(label) && !(0, _underscore.isArray)(label)) // if ('classes' in JSON.parse(label))
56 |     if ('classes' in label) label = label.classes;
57 |   if (!(label[0] instanceof Array)) return [label];else return label; // }
58 |   // else
59 |   // {
60 |   // return [label]
61 |   // }
62 | }
63 | 
64 | function clonedataset(set) {
65 |   set1 = [];
66 |   (0, _underscore.each)(set, function (value, key, list) {
67 |     set1.push((0, _underscore.clone)(value));
68 |   });
69 |   return set1;
70 | }


--------------------------------------------------------------------------------
/dist/core/svm/SvmLinearMulticlassDemo.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | // simple demonstration of multiclass SVM, based on LibLinear
 4 | var SvmLinear = require('./SvmLinear');
 5 | 
 6 | var trainSet = [{
 7 |   input: [0, 0],
 8 |   output: 3
 9 | }, {
10 |   input: [1, 1],
11 |   output: 3
12 | }, {
13 |   input: [0, 1],
14 |   output: 4
15 | }, {
16 |   input: [1, 2],
17 |   output: 4
18 | }, {
19 |   input: [0, 2],
20 |   output: 5
21 | }, {
22 |   input: [1, 3],
23 |   output: 5
24 | }]; // One separating line goes through [0,0.5] and [1,1.5]. It is:
25 | //        0.5+x-y = 0
26 | // or:   -1-2x+2y = 0
27 | //Another separating line goes through [0,1.5] and [1,2.5]. It is:
28 | //       1.5+x-y = 0
29 | //or:   -3-2x+2y = 0
30 | 
31 | var classifier = new SvmLinear({
32 |   learn_args: "-c 20",
33 |   model_file_prefix: "tempfiles/SvmLinearMulticlassDemo",
34 |   multiclass: true,
35 |   debug: false
36 | });
37 | classifier.trainBatch(trainSet);
38 | console.log("simple classification: ");
39 | console.dir(classifier.classify([1, 0])); // 3
40 | 
41 | console.dir(classifier.classify([0, 1.3])); // 4
42 | 
43 | console.dir(classifier.classify([0, 1.7])); // 5
44 | 
45 | console.dir(classifier.classify([0, 3])); // 5
46 | 
47 | console.log("model: ");
48 | console.dir(classifier.mapLabelToMapFeatureToWeight); // { '0': -1, '1': -2, '2': 2 }
49 | 
50 | console.log("explained classification: ");
51 | console.dir(classifier.classify([1, 0], 3)); // 3
52 | 
53 | console.dir(classifier.classify([0, 1.3], 3)); // 4
54 | 
55 | console.dir(classifier.classify([0, 1.7], 3)); // 5
56 | 
57 | console.dir(classifier.classify([0, 3], 3)); // 5
58 | 
59 | console.log("classification with scores: ");
60 | console.dir(classifier.classify([1, 0], 0, true)); // 3
61 | 
62 | console.dir(classifier.classify([0, 1.3], 0, true)); // 4
63 | 
64 | console.dir(classifier.classify([0, 1.7], 0, true)); // 5
65 | 
66 | console.dir(classifier.classify([0, 3], 0, true)); // 5
67 | 
68 | console.log("explained classification with scores: ");
69 | console.dir(classifier.classify([1, 0], 3, true)); // 3
70 | 
71 | console.dir(classifier.classify([0, 1.3], 3, true)); // 4
72 | 
73 | console.dir(classifier.classify([0, 1.7], 3, true)); // 5
74 | 
75 | console.dir(classifier.classify([0, 3], 3, true)); // 5


--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/ClassifierWithSplitterTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for multi-label classifier with input-splitter (sentence splitter)
 3 |  */
 4 | 
 5 | import { EnhancedClassifier, multilabel, Winnow } from "../../../dist/core";
 6 | import { NGramsOfWords, RegexpSplitter } from "../../../dist/features";
 7 | 
 8 | describe.skip("baseline - classifier without a splitter", function() {
 9 |   it("should not classify long sentencs", function() {
10 |     var classifier = new EnhancedClassifier({
11 |       classifierType: multilabel.BinaryRelevance.bind(this, {
12 |         binaryClassifierType: Winnow.bind(this, {
13 |           retrain_count: 10
14 |         })
15 |       }),
16 |       featureExtractor: NGramsOfWords(1),
17 |       inputSplitter: null
18 |     });
19 | 
20 |     classifier.trainBatch([
21 |       { input: "I want aa", output: "A" },
22 |       { input: "I want bb", output: "B" },
23 |       { input: "I want cc", output: "C" }
24 |     ]);
25 | 
26 |     classifier.classify("I want aa").should.eql(["A"]);
27 |     classifier.classify("I want bb").should.eql(["B"]);
28 |     classifier.classify("I want cc").should.eql(["C"]);
29 |     classifier
30 |       .classify("I want aa, I want bb, and I want cc")
31 |       .should.not.eql(["A", "B", "C"]);
32 |   });
33 | });
34 | 
35 | describe.skip("classifier with a splitter", function() {
36 |   it("should classify long sentencs", function() {
37 |     var classifier = new EnhancedClassifier({
38 |       classifierType: multilabel.BinaryRelevance.bind(this, {
39 |         binaryClassifierType: Winnow.bind(this, {
40 |           retrain_count: 3
41 |         })
42 |       }),
43 |       featureExtractor: NGramsOfWords(1),
44 |       inputSplitter: RegexpSplitter("[.,;?!]|and")
45 |     });
46 | 
47 |     classifier.trainBatch([
48 |       { input: "I want aa", output: "A" }, // train on single class
49 |       { input: "I want bb", output: "B" }, // train on array with single class (same effect)
50 |       { input: "I want cc", output: "C" } // train on structured class, that will be stringified to "{C:c}".
51 |     ]);
52 | 
53 |     classifier.classify("I want aa").should.eql(["A"]);
54 |     classifier.classify("I want bb").should.eql(["B"]);
55 |     classifier.classify("I want cc").should.eql(["C"]);
56 |     classifier
57 |       .classify("I want aa, I want bb, and I want cc")
58 |       .should.eql(["A", "B", "C"]);
59 |   });
60 | });
61 | 


--------------------------------------------------------------------------------
/src/utils/unseen_correlation.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	Correlation between unseen words and False Negative 
 3 | 
 4 | 	The assumption is that previously unseen word mostly might cause false negative type of mistake.
 5 | 	Module does cross-validation on the given dataset, in the test utterances where there is 
 6 | 	unseen words and false negative mistake the the dict is build, where the key is a word and 
 7 | 	the value is the list of false negative mistakes.
 8 | 
 9 | 	@author Vasily Konovalov
10 |  */
11 | 
12 | var _ = require('underscore')._;
13 | var fs = require('fs');
14 | var partitions = require('./partitions');
15 | var trainAndTest = require('./trainAndTest').trainAndTest;
16 | var trainAndTest_hash= require('./trainAndTest').trainAndTest_hash;
17 | 
18 | function normalizer(sentence) {
19 | 	if (typeof sentence == 'undefined')
20 | 		{return ""}
21 | 	else
22 | 		{
23 | 		return sentence.toLowerCase().trim();
24 | 		}
25 | }
26 | 
27 | function tokenizedataset(dataset, tokenize)
28 | { 
29 | 	vocabulary = []
30 | 	for (var sample in dataset) 
31 |     {
32 | 		if (dataset[sample].length!=0)
33 | 	   	{
34 | 	   	var words = tokenize(normalizer(dataset[sample]['input']));
35 |     	vocabulary = vocabulary.concat(words);
36 |     	}
37 | 	 }
38 |     return _.uniq(vocabulary);
39 | }
40 | 
41 | module.exports.tokenize = function(str)
42 | 	{
43 | 		pattern = new RegExp(/(\w+|\!|\'|\"")/i);
44 | 		str = str.split(pattern)
45 | 		return _.without(str,'',' ')
46 | 	}
47 | /*
48 | 	@params dataset - dataset to estimate the correlation
49 | 	@params classifier - classifier to estimate false negative mistakes.
50 | 
51 | 	*/
52 | module.exports.unseen_correlation = function(dataset, classifier, tokenize) {
53 | 	unseen_correlation = {}
54 | 
55 | 	partitions.partitions(dataset, 5, function(trainSet, testSet, index) { 
56 | 		unseen_vocabulary = tokenizedataset(testSet, tokenize)
57 | 		seen_vocabulary = tokenizedataset(trainSet, tokenize)
58 | 		var stats  = trainAndTest_hash(classifier, trainSet, testSet, 5);
59 | 	
60 | 	_.each(stats['data'],  function(report, key, list){ 
61 | 		if (report['explanations']['FN'].length > 0)
62 | 			{
63 | 			unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary)
64 | 			_.each(unseen_words, function(word, key, list) {
65 | 	    		if (!(word in unseen_correlation))
66 | 	    			{
67 |     				unseen_correlation[word] = []
68 | 	    			}
69 | 	    		unseen_correlation[word].push(report['explanations']['FN'])
70 | 	    		})
71 | 			}
72 | 		})
73 |   	})
74 |   	return unseen_correlation
75 | }
76 | 


--------------------------------------------------------------------------------
/dist/utils/unseen_correlation.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /*
 4 | 	Correlation between unseen words and False Negative 
 5 | 
 6 | 	The assumption is that previously unseen word mostly might cause false negative type of mistake.
 7 | 	Module does cross-validation on the given dataset, in the test utterances where there is 
 8 | 	unseen words and false negative mistake the the dict is build, where the key is a word and 
 9 | 	the value is the list of false negative mistakes.
10 | 
11 | 	@author Vasily Konovalov
12 |  */
13 | var _ = require('underscore')._;
14 | 
15 | var fs = require('fs');
16 | 
17 | var partitions = require('./partitions');
18 | 
19 | var trainAndTest = require('./trainAndTest').trainAndTest;
20 | 
21 | var trainAndTest_hash = require('./trainAndTest').trainAndTest_hash;
22 | 
23 | function normalizer(sentence) {
24 |   if (typeof sentence == 'undefined') {
25 |     return "";
26 |   } else {
27 |     return sentence.toLowerCase().trim();
28 |   }
29 | }
30 | 
31 | function tokenizedataset(dataset, tokenize) {
32 |   vocabulary = [];
33 | 
34 |   for (var sample in dataset) {
35 |     if (dataset[sample].length != 0) {
36 |       var words = tokenize(normalizer(dataset[sample]['input']));
37 |       vocabulary = vocabulary.concat(words);
38 |     }
39 |   }
40 | 
41 |   return _.uniq(vocabulary);
42 | }
43 | 
44 | module.exports.tokenize = function (str) {
45 |   pattern = new RegExp(/(\w+|\!|\'|\"")/i);
46 |   str = str.split(pattern);
47 |   return _.without(str, '', ' ');
48 | };
49 | /*
50 | 	@params dataset - dataset to estimate the correlation
51 | 	@params classifier - classifier to estimate false negative mistakes.
52 | 
53 | 	*/
54 | 
55 | 
56 | module.exports.unseen_correlation = function (dataset, classifier, tokenize) {
57 |   unseen_correlation = {};
58 |   partitions.partitions(dataset, 5, function (trainSet, testSet, index) {
59 |     unseen_vocabulary = tokenizedataset(testSet, tokenize);
60 |     seen_vocabulary = tokenizedataset(trainSet, tokenize);
61 |     var stats = trainAndTest_hash(classifier, trainSet, testSet, 5);
62 | 
63 |     _.each(stats['data'], function (report, key, list) {
64 |       if (report['explanations']['FN'].length > 0) {
65 |         unseen_words = _.difference(tokenize(normalizer(report['input'])), seen_vocabulary);
66 | 
67 |         _.each(unseen_words, function (word, key, list) {
68 |           if (!(word in unseen_correlation)) {
69 |             unseen_correlation[word] = [];
70 |           }
71 | 
72 |           unseen_correlation[word].push(report['explanations']['FN']);
73 |         });
74 |       }
75 |     });
76 |   });
77 |   return unseen_correlation;
78 | };


--------------------------------------------------------------------------------
/test/utilsTest/PartitionsTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for Partitions unit (creating partitions for train and test)
 3 |  */
 4 | 
 5 | import _, { isEqual } from "underscore";
 6 | import { partitions } from "../../dist/utils";
 7 | 
 8 | describe("partitions", function() {
 9 |   it("partitions_consistent_by_fold", function() {
10 |     var dataset = [1, 3, 5, 7, 9, 11, 13];
11 |     var data = partitions.partitions_consistent_by_fold(dataset, 2, 1);
12 |     isEqual(data, {
13 |       train: [1, 3, 5, 13],
14 |       test: [7, 9, 11]
15 |     }).should.be.true;
16 |   });
17 | 
18 |   it.skip("partitions_hash_fold", function() {
19 |     var dataset = {
20 |       label1: [1, 3, 5, 7, 9, 11, 13],
21 |       label2: [0, 2, 4, 6, 8, 10, 12]
22 |     };
23 |     var data = partitions.partitions_hash_fold(dataset, 2, 1);
24 |     isEqual(data["test"], [7, 9, 11, 6, 8, 10]).should.be.true;
25 |     var data = partitions.partitions_hash_fold(dataset, 3, 2);
26 |     isEqual(data["test"], [9, 11, 8, 10]).should.be.true;
27 |   });
28 | 
29 |   it("partition hash", function() {
30 |     var dataset = {
31 |       label1: [1, 3, 5, 7, 9, 11, 13],
32 |       label2: [0, 2, 4, 6, 8, 10, 12]
33 |     };
34 |     partitions.partitions_hash(dataset, 2, function(train, test, index) {
35 |       test.should.have.lengthOf(6);
36 |       train.should.have.lengthOf(4);
37 |     });
38 |   });
39 | 
40 |   // A dummy dataset with 10 documents:
41 |   var dataset = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
42 |   it("creates 5 partitions, with a test-set of 2 in each", function() {
43 |     var numOfPartitions = 0;
44 |     partitions.partitions(dataset, 5, function(train, test, index) {
45 |       //console.log("\t"+index+": "+train+" / "+test);
46 |       train.should.have.lengthOf(8);
47 |       test.should.have.lengthOf(2);
48 |       _(test)
49 |         .intersection(train)
50 |         .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test!
51 |       numOfPartitions++;
52 |     });
53 |     numOfPartitions.should.equal(5);
54 |   });
55 |   it("creates 3 partitions, with a test-set of 3 in each", function() {
56 |     var numOfPartitions = 0;
57 |     partitions.partitions(dataset, 3, function(train, test, index) {
58 |       //console.log("\t"+index+": "+train+" / "+test);
59 |       train.should.have.lengthOf(7);
60 |       test.should.have.lengthOf(3);
61 |       _(test)
62 |         .intersection(train)
63 |         .should.have.lengthOf(0); // most important test - make sure there is no leak from train to test!
64 |       numOfPartitions++;
65 |     });
66 |     numOfPartitions.should.equal(3);
67 |   });
68 | });
69 | 


--------------------------------------------------------------------------------
/test/classifiersTest/WinnowExampleTest.js:
--------------------------------------------------------------------------------
 1 | import { classifiers, features as _features } from "../../index";
 2 | 
 3 | // First, define our base classifier type (a multi-label classifier based on winnow):
 4 | var TextClassifier = classifiers.multilabel.BinaryRelevance.bind(0, {
 5 |   binaryClassifierType: classifiers.Winnow.bind(0, {
 6 |     retrain_count: 10
 7 |   })
 8 | });
 9 | 
10 | // Define a feature extractor (a function that takes a sample and add features to a given features set):
11 | var WordExtractor = function(input, features) {
12 |   input.split(" ").forEach(function(word) {
13 |     features[word] = 1;
14 |   });
15 | };
16 | 
17 | describe("winnow classifier", function() {
18 |   it("works with a feature-extractor", function() {
19 |     // Initialize a classifier with a feature extractor:
20 |     var intentClassifier = new classifiers.EnhancedClassifier({
21 |       classifierType: TextClassifier,
22 |       featureExtractor: WordExtractor
23 |     });
24 | 
25 |     // Train and test:
26 |     intentClassifier.trainBatch([
27 |       {
28 |         input: "I want an apple",
29 |         output: "apl"
30 |       },
31 |       {
32 |         input: "I want a banana",
33 |         output: "bnn"
34 |       },
35 |       {
36 |         input: "I want chips",
37 |         output: "cps"
38 |       }
39 |     ]);
40 |     intentClassifier
41 |       .classify("I want an apple and a banana")
42 |       .sort()
43 |       .should.eql(["apl", "bnn"]);
44 |     intentClassifier
45 |       .classify("I WANT AN APPLE AND A BANANA")
46 |       .sort()
47 |       .should.eql([]); // case sensitive
48 |   });
49 | });
50 | 
51 | describe("winnow classifier", function() {
52 |   it("works with a case-normalizer", function() {
53 |     //Initialize a classifier with a feature extractor and a case normalizer:
54 |     intentClassifier = new classifiers.EnhancedClassifier({
55 |       classifierType: TextClassifier,
56 |       normalizer: _features.LowerCaseNormalizer,
57 |       featureExtractor: WordExtractor
58 |     });
59 | 
60 |     //Train and test:
61 |     intentClassifier.trainBatch([
62 |       {
63 |         input: "I want an apple",
64 |         output: "apl"
65 |       },
66 |       {
67 |         input: "I want a banana",
68 |         output: "bnn"
69 |       },
70 |       {
71 |         input: "I want chips",
72 |         output: "cps"
73 |       }
74 |     ]);
75 | 
76 |     intentClassifier
77 |       .classify("I want an apple and a banana")
78 |       .sort()
79 |       .should.eql(["apl", "bnn"]);
80 |     intentClassifier
81 |       .classify("I WANT AN APPLE AND A BANANA")
82 |       .sort()
83 |       .should.eql(["apl", "bnn"]); // case insensitive
84 |   });
85 | });
86 | 


--------------------------------------------------------------------------------
/src/features/index.js:
--------------------------------------------------------------------------------
 1 | export const NGramsOfLetters = require("./NGramsOfLetters");
 2 | export const Hypernyms = require("./HypernymExtractor");
 3 | export const FeatureLookupTable = require("./FeatureLookupTable");
 4 | export const LowerCaseNormalizer = require("./LowerCaseNormalizer");
 5 | export const RegexpNormalizer = require("./RegexpNormalizer");
 6 | export const RegexpSplitter = require("./RegexpSplitter");
 7 | 
 8 | /**
 9 |  * CollectionOfExtractors - combines the features from several feature extractors. 
10 |  * @param extractors - an array of other feature extractors. 
11 |  * @param sample - a string.
12 |  * @param features an initial hash of features (optional).
13 |  * @return a hash with all features generated from the sample by the different extractors
14 |  */
15 | export function CollectionOfExtractors(extractors) {
16 | 	return function(sample, features) {
17 | 		for (var i=0; i<extractors.length; ++i){
18 | 			extractors[i](sample, features);
19 | 		}
20 | 	};
21 | };
22 | 
23 | 
24 | 
25 | /**
26 |  * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
27 |  */
28 | export function NGramsFromArray(numOfWords, gap, grams, features) {
29 | 	for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
30 | 		grams.unshift("[start]");
31 | 		grams.push("[end]");
32 | 	}
33 | 	for (var i = 0; i <= grams.length - numOfWords; ++i) {
34 | 		let sliceOfWords = grams.slice(i, i + numOfWords);
35 | 		if (gap) sliceOfWords[1] = "-";
36 | 		let feature = sliceOfWords.join(" ");
37 | 		features[feature.trim()] = 1;
38 | 	}
39 | 	for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
40 | 		grams.pop();
41 | 		grams.shift();
42 | 	}
43 | }
44 | 
45 | export function NGramsOfWords(numOfWords, gap) {
46 | 	return function (sample, features) {
47 | 		var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
48 | 			return !!a
49 | 		}); // all non-empty words
50 | 		NGramsFromArray(numOfWords, gap, words, features);
51 | 	};
52 | }
53 | 
54 | 
55 | /**
56 |  * Call the given featureExtractor on the given sample, and return the result.
57 |  * Used for testing.
58 |  */
59 | export function call(featureExtractor, sample) {
60 | 	var features = {};
61 | 	featureExtractor(sample, features);
62 | 	return features;
63 | } 
64 | 
65 | /**
66 |  * If the input is a featureExtractor, return it as is.
67 |  *
68 |  * If it is an array of featureExtractors, convert it to a CollectionOfExtractors.
69 |  *
70 |  */
71 | export function normalize(featureExtractorOrArray) {
72 | 	return (!featureExtractorOrArray? 
73 | 				featureExtractorOrArray:
74 | 			Array.isArray(featureExtractorOrArray)? 
75 | 				new CollectionOfExtractors(featureExtractorOrArray):
76 | 				featureExtractorOrArray);	
77 | }
78 | 


--------------------------------------------------------------------------------
/src/core/multilabel/BinaryRelevanceDemo.js:
--------------------------------------------------------------------------------
  1 | // simple demonstration of Binary Relevance (one-vs.-all) classifier
  2 | 
  3 | var classifiers = require('..');
  4 | 
  5 | var trainSet = [{
  6 | 		input: {
  7 | 			'I': 1,
  8 | 			'want': 1,
  9 | 			'aa': 1
 10 | 		},
 11 | 		output: 'a'
 12 | 	},
 13 | 	{
 14 | 		input: {
 15 | 			'I': 1,
 16 | 			'want': 1,
 17 | 			'bb': 1
 18 | 		},
 19 | 		output: 'b'
 20 | 	},
 21 | 	{
 22 | 		input: {
 23 | 			'I': 1,
 24 | 			'want': 1,
 25 | 			'cc': 1
 26 | 		},
 27 | 		output: 'c'
 28 | 	},
 29 | ];
 30 | 
 31 | var classifier = new classifiers.multilabel.BinaryRelevance({
 32 | 	binaryClassifierType: classifiers.Winnow.bind(0, {
 33 | 		retrain_count: 10
 34 | 	})
 35 | });
 36 | classifier.trainBatch(trainSet);
 37 | 
 38 | console.log("simple classification: ");
 39 | console.dir(classifier.classify({
 40 | 	'I': 1,
 41 | 	'want': 1,
 42 | 	'aa': 1
 43 | })); // a
 44 | console.dir(classifier.classify({
 45 | 	'I': 1,
 46 | 	'need': 1,
 47 | 	'bb': 1
 48 | })); // b
 49 | console.dir(classifier.classify({
 50 | 	'I': 1,
 51 | 	'feel': 1,
 52 | 	'cc': 1
 53 | })); // c
 54 | console.dir(classifier.classify({
 55 | 	'I': 1,
 56 | 	'need': 1,
 57 | 	'aa': 1,
 58 | 	'bb': 1
 59 | })); // a,b
 60 | 
 61 | //console.log("model: ");
 62 | //console.dir(classifier);
 63 | 
 64 | console.log("explained classification: ");
 65 | console.dir(classifier.classify({
 66 | 	'I': 1,
 67 | 	'want': 1,
 68 | 	'aa': 1
 69 | }, 5)); // a
 70 | console.dir(classifier.classify({
 71 | 	'I': 1,
 72 | 	'need': 1,
 73 | 	'bb': 1
 74 | }, 5)); // b
 75 | console.dir(classifier.classify({
 76 | 	'I': 1,
 77 | 	'feel': 1,
 78 | 	'cc': 1
 79 | }, 5)); // c
 80 | console.dir(classifier.classify({
 81 | 	'I': 1,
 82 | 	'need': 1,
 83 | 	'aa': 1,
 84 | 	'bb': 1
 85 | }, 5)); // a,b
 86 | 
 87 | console.log("classification with scores: ");
 88 | console.dir(classifier.classify({
 89 | 	'I': 1,
 90 | 	'need': 1,
 91 | 	'aa': 1
 92 | }, 0, true)); // a
 93 | console.dir(classifier.classify({
 94 | 	'I': 1,
 95 | 	'need': 1,
 96 | 	'bb': 1
 97 | }, 0, true)); // b
 98 | console.dir(classifier.classify({
 99 | 	'I': 1,
100 | 	'need': 1,
101 | 	'cc': 1
102 | }, 0, true)); // c
103 | console.dir(classifier.classify({
104 | 	'I': 1,
105 | 	'need': 1,
106 | 	'aa': 1,
107 | 	'bb': 1
108 | }, 0, true)); // a,b
109 | 
110 | console.log("explained classification with scores: ");
111 | console.dir(classifier.classify({
112 | 	'I': 1,
113 | 	'need': 1,
114 | 	'aa': 1
115 | }, 5, true)); // a
116 | console.dir(classifier.classify({
117 | 	'I': 1,
118 | 	'need': 1,
119 | 	'bb': 1
120 | }, 5, true)); // b
121 | console.dir(classifier.classify({
122 | 	'I': 1,
123 | 	'need': 1,
124 | 	'cc': 1
125 | }, 5, true)); // c
126 | console.dir(classifier.classify({
127 | 	'I': 1,
128 | 	'need': 1,
129 | 	'aa': 1,
130 | 	'bb': 1
131 | }, 5, true)); // a,b


--------------------------------------------------------------------------------
/test/classifiersTest/NeuralWithSpellCheckerTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for Enhanced Classifier
 3 |  */
 4 | 
 5 | import { _ } from "underscore";
 6 | import { EnhancedClassifier, NeuralNetwork } from "../../dist/core";
 7 | import { NGramsOfWords } from "../../dist/features";
 8 | 
 9 | var RegexpTokenizer = function(options) {
10 |   var options = options || {};
11 |   this._pattern = options.pattern || this._pattern;
12 | };
13 | 
14 | RegexpTokenizer.prototype.tokenize = function(s) {
15 |   var results = s.split(this._pattern);
16 |   return _.without(results, "", " ");
17 | };
18 | 
19 | try {
20 |   var wordsworth = require("wordsworth");
21 |   var isTestRelevant = true;
22 | } catch (e) {
23 |   var isTestRelevant = false;
24 | }
25 | 
26 | describe("baseline - classifier without a spell-checker", function() {
27 |   it("errs on sentences with spelling mistakes", function() {
28 |     var spamClassifier = new EnhancedClassifier({
29 |       classifierType: NeuralNetwork,
30 |       featureExtractor: NGramsOfWords(1),
31 |       spellChecker: null
32 |     });
33 | 
34 |     spamClassifier.trainBatch([
35 |       {
36 |         input: "cheap watches",
37 |         output: [1]
38 |       },
39 |       {
40 |         input: "",
41 |         output: [0]
42 |       }
43 |     ]);
44 | 
45 |     spamClassifier.classify("cheap watches").should.be.above(0.6); // (spam)
46 |     spamClassifier.classify("cheep watchs").should.be.below(0.4); // (not spam)
47 |     spamClassifier.classify("expensive clocks").should.be.below(0.4); // (not spam)
48 |   });
49 | });
50 | 
51 | describe("classifier with spell-checker", function() {
52 |   it(
53 |     "classifies sentences with spelling mistakes correctly",
54 |     isTestRelevant
55 |       ? function() {
56 |           var spamClassifier = new EnhancedClassifier({
57 |             classifierType: NeuralNetwork,
58 |             featureExtractor: NGramsOfWords(1),
59 |             tokenizer: new RegexpTokenizer({
60 |               pattern: /[^a-zA-Z0-9%'$,]+/
61 |             }),
62 |             spellChecker: [wordsworth.getInstance(), wordsworth.getInstance()]
63 |           });
64 | 
65 |           spamClassifier.trainBatch([
66 |             {
67 |               input: "cheap watches",
68 |               output: [1]
69 |             },
70 |             {
71 |               input: "",
72 |               output: [0]
73 |             }
74 |           ]);
75 | 
76 |           spamClassifier.classify("cheap watches").should.be.above(0.9); // (spam)
77 |           //spamClassifier.classify("cheep watchs").should.be.above(0.9);  // (not spam)
78 |           spamClassifier.classify("expensive clocks").should.be.below(0.4); // (not spam)
79 |         }
80 |       : null
81 |   );
82 | });
83 | 
84 | const _RegexpTokenizer = RegexpTokenizer;
85 | export { _RegexpTokenizer as RegexpTokenizer };
86 | 


--------------------------------------------------------------------------------
/test/classifiersTest/SvmJsTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for svm.js classifier
  3 |  */
  4 | 
  5 | import { EnhancedClassifier, SvmJs } from "../../dist/core";
  6 | import { FeatureLookupTable } from "../../dist/features";
  7 | 
  8 | var SvmClassifier = SvmJs.bind(this, {
  9 |   C: 20.0
 10 | });
 11 | 
 12 | describe("svm.js classifier with numeric features", function() {
 13 |   var trainSet = [
 14 |     {
 15 |       input: [0, 0],
 16 |       output: 0
 17 |     },
 18 |     {
 19 |       input: [1, 1],
 20 |       output: 0
 21 |     },
 22 |     {
 23 |       input: [0, 1],
 24 |       output: 1
 25 |     },
 26 |     {
 27 |       input: [1, 2],
 28 |       output: 1
 29 |     }
 30 |   ];
 31 | 
 32 |   var classifier = new SvmClassifier();
 33 |   classifier.trainBatch(trainSet);
 34 | 
 35 |   it("supports training and classification", function() {
 36 |     classifier.classify([0, 2]).should.eql(1);
 37 |     classifier.classify([1, 0]).should.eql(0);
 38 |   });
 39 | 
 40 |   it("explains its classifications", function() {
 41 |     classifier
 42 |       .classify([0, 2], 2)
 43 |       .should.have.property("explanation")
 44 |       .with.lengthOf(2);
 45 |     classifier
 46 |       .classify([1, 0], 2)
 47 |       .should.have.property("explanation")
 48 |       .with.lengthOf(2);
 49 |   });
 50 | });
 51 | 
 52 | var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, {
 53 |   classifierType: SvmClassifier,
 54 |   featureLookupTable: new FeatureLookupTable()
 55 | });
 56 | 
 57 | describe("svm.js classifier with string features", function() {
 58 |   var trainSet = [
 59 |     {
 60 |       input: {
 61 |         a: 0,
 62 |         b: 0
 63 |       },
 64 |       output: 0
 65 |     },
 66 |     {
 67 |       input: {
 68 |         a: 1,
 69 |         b: 1
 70 |       },
 71 |       output: 0
 72 |     },
 73 |     {
 74 |       input: {
 75 |         a: 0,
 76 |         b: 1
 77 |       },
 78 |       output: 1
 79 |     },
 80 |     {
 81 |       input: {
 82 |         a: 1,
 83 |         b: 2
 84 |       },
 85 |       output: 1
 86 |     }
 87 |   ];
 88 | 
 89 |   var classifier = new SvmClassifierStringFeatures();
 90 |   classifier.trainBatch(trainSet);
 91 | 
 92 |   it("supports training and classification", function() {
 93 |     classifier
 94 |       .classify({
 95 |         a: 0,
 96 |         b: 2
 97 |       })
 98 |       .should.eql(1);
 99 |     classifier
100 |       .classify({
101 |         a: 1,
102 |         b: 0
103 |       })
104 |       .should.eql(0);
105 |   });
106 | 
107 |   it("explains its classifications", function() {
108 |     classifier
109 |       .classify([0, 2], 2)
110 |       .should.have.property("explanation")
111 |       .with.lengthOf(2);
112 |     classifier
113 |       .classify([1, 0], 2)
114 |       .should.have.property("explanation")
115 |       .with.lengthOf(2);
116 |   });
117 | });
118 | 


--------------------------------------------------------------------------------
/src/core/svm/SvmJs.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A wrapper for karpathy's SVM.js package: https://github.com/karpathy/svmjs
 3 |  *
 4 |  *  This is a binary SVM and is trained using the SMO algorithm.
 5 |  *
 6 |  *  Reference: "The Simplified SMO Algorithm" (http://math.unt.edu/~hsp0009/smo.pdf)
 7 |  *
 8 |  * @author Erel Segal-haLevi
 9 |  * @since 2013-09-09
10 |  */
11 | 
12 | import { SVM as SvmJsBase } from "svm";
13 | 
14 | class SvmJs {
15 |   constructor(opts) {
16 |     this.base = new SvmJsBase();
17 |     this.opts = opts; // options for SvmJsBase.train
18 |   }
19 | }
20 | 
21 | SvmJs.prototype = {
22 |   trainOnline: function(features, label) {
23 |     throw new Error("svm.js does not support online training");
24 |   },
25 | 
26 |   trainBatch: function(dataset) {
27 |     var data = [];
28 |     var labels = [];
29 |     dataset.forEach(function(datum) {
30 |       data.push(datum.input);
31 |       labels.push(datum.output > 0 ? 1 : -1);
32 |     });
33 |     return this.base.train(data, labels, this.opts);
34 |   },
35 | 
36 |   /**
37 |    * @param features - a feature-value hash.
38 |    * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
39 |    * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1.
40 |    * @return the binary classification - 0 or 1.
41 |    */
42 |   classify: function(features, explain, continuous_output) {
43 |     var score = this.base.marginOne(features);
44 |     var classification = continuous_output ? score : score > 0 ? 1 : 0;
45 | 
46 |     if (explain > 0) {
47 |       var f = this.base.b;
48 | 
49 |       // if the linear kernel was used and w was computed and stored,
50 |       // (i.e. the svm has fully finished training)
51 |       // the internal class variable usew_ will be set to true.
52 |       var explanations = [];
53 |       if (this.base.usew_) {
54 |         var w = this.base.w;
55 |         for (var j = 0; j < this.base.D; j++) {
56 |           explanations[j] = {
57 |             feature: j,
58 |             value: features[j],
59 |             weight: w[j],
60 |             relevance: features[j] * w[j]
61 |           };
62 |         }
63 |       } else {
64 |         // explanations not supported.
65 |         //for(var i=0;i<this.N;i++) {
66 |         // f += this.alpha[i] * this.labels[i] * this.kernel(inst, this.data[i]);
67 |         //}
68 |       }
69 |       explanations.sort(function(a, b) {
70 |         return b.relevance - a.relevance;
71 |       });
72 |       return {
73 |         classification: classification,
74 |         explanation: explanations.slice(0, explain)
75 |       };
76 |     } else {
77 |       return classification;
78 |     }
79 |   },
80 | 
81 |   toJSON: function() {
82 |     return this.base.toJSON();
83 |   },
84 | 
85 |   fromJSON: function(json) {
86 |     this.base.fromJSON(json);
87 |   }
88 | };
89 | 
90 | export default SvmJs;
91 | 


--------------------------------------------------------------------------------
/dist/core/svm/SvmJs.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | Object.defineProperty(exports, "__esModule", {
 4 |   value: true
 5 | });
 6 | exports["default"] = void 0;
 7 | 
 8 | var _svm = require("svm");
 9 | 
10 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
11 | 
12 | var SvmJs = function SvmJs(opts) {
13 |   _classCallCheck(this, SvmJs);
14 | 
15 |   this.base = new _svm.SVM();
16 |   this.opts = opts; // options for SvmJsBase.train
17 | };
18 | 
19 | SvmJs.prototype = {
20 |   trainOnline: function trainOnline(features, label) {
21 |     throw new Error("svm.js does not support online training");
22 |   },
23 |   trainBatch: function trainBatch(dataset) {
24 |     var data = [];
25 |     var labels = [];
26 |     dataset.forEach(function (datum) {
27 |       data.push(datum.input);
28 |       labels.push(datum.output > 0 ? 1 : -1);
29 |     });
30 |     return this.base.train(data, labels, this.opts);
31 |   },
32 | 
33 |   /**
34 |    * @param features - a feature-value hash.
35 |    * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
36 |    * @param continuous_output if true, return the net classification score. If false [default], return 0 or 1.
37 |    * @return the binary classification - 0 or 1.
38 |    */
39 |   classify: function classify(features, explain, continuous_output) {
40 |     var score = this.base.marginOne(features);
41 |     var classification = continuous_output ? score : score > 0 ? 1 : 0;
42 | 
43 |     if (explain > 0) {
44 |       var f = this.base.b; // if the linear kernel was used and w was computed and stored,
45 |       // (i.e. the svm has fully finished training)
46 |       // the internal class variable usew_ will be set to true.
47 | 
48 |       var explanations = [];
49 | 
50 |       if (this.base.usew_) {
51 |         var w = this.base.w;
52 | 
53 |         for (var j = 0; j < this.base.D; j++) {
54 |           explanations[j] = {
55 |             feature: j,
56 |             value: features[j],
57 |             weight: w[j],
58 |             relevance: features[j] * w[j]
59 |           };
60 |         }
61 |       } else {// explanations not supported.
62 |         //for(var i=0;i<this.N;i++) {
63 |         // f += this.alpha[i] * this.labels[i] * this.kernel(inst, this.data[i]);
64 |         //}
65 |       }
66 | 
67 |       explanations.sort(function (a, b) {
68 |         return b.relevance - a.relevance;
69 |       });
70 |       return {
71 |         classification: classification,
72 |         explanation: explanations.slice(0, explain)
73 |       };
74 |     } else {
75 |       return classification;
76 |     }
77 |   },
78 |   toJSON: function toJSON() {
79 |     return this.base.toJSON();
80 |   },
81 |   fromJSON: function fromJSON(json) {
82 |     this.base.fromJSON(json);
83 |   }
84 | };
85 | var _default = SvmJs;
86 | exports["default"] = _default;


--------------------------------------------------------------------------------
/test/classifiersTest/NeuralWithFeatureExtractorTest.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * a unit-test for Enhanced Classifier
 3 |  */
 4 | 
 5 | import { EnhancedClassifier, NeuralNetwork } from "../../dist/core";
 6 | import { NGramsOfWords } from "../../dist/features";
 7 | 
 8 | describe("classifier with a single feature extractor for words", function() {
 9 |   it("classifies sentences", function() {
10 |     var spamClassifier = new EnhancedClassifier({
11 |       classifierType: NeuralNetwork,
12 |       featureExtractor: NGramsOfWords(1)
13 |     });
14 | 
15 |     spamClassifier.trainBatch([
16 |       {
17 |         input: "cheap replica watch es",
18 |         output: [1]
19 |       },
20 |       {
21 |         input: "your watch is ready",
22 |         output: [0]
23 |       },
24 |       {
25 |         input: "I don't know if this works on windows",
26 |         output: [0]
27 |       },
28 |       {
29 |         input: "cheap windows !!!",
30 |         output: [1]
31 |       }
32 |     ]);
33 | 
34 |     spamClassifier.classify("cheap clocks !!!").should.be.above(0.8); // very high number (spam)
35 |     spamClassifier
36 |       .classify("I don't know if this is a replica of windows")
37 |       .should.be.below(0.2); // low number (not spam)
38 |     spamClassifier.classify("replica").should.be.above(0.5); // high number (probably spam)
39 |     spamClassifier.classify("your").should.be.below(0.5); // low number (not spam)
40 |     spamClassifier
41 |       .classify("watch")
42 |       .should.be.above(0.3)
43 |       .and.below(0.7); // medium number (not sure if spam)
44 |   });
45 | });
46 | 
47 | describe("classifier with an array of feature extractors, for words and bigrams", function() {
48 |   it("classifies sentences", function() {
49 |     var spamClassifier = new EnhancedClassifier({
50 |       classifierType: NeuralNetwork,
51 |       featureExtractor: [NGramsOfWords(1), NGramsOfWords(2)]
52 |     });
53 | 
54 |     spamClassifier.trainBatch([
55 |       {
56 |         input: "cheap replica watch es",
57 |         output: [1]
58 |       },
59 |       {
60 |         input: "your watch is ready",
61 |         output: [0]
62 |       },
63 |       {
64 |         input: "I don't know if this works on windows",
65 |         output: [0]
66 |       },
67 |       {
68 |         input: "cheap windows !!!",
69 |         output: [1]
70 |       }
71 |     ]);
72 | 
73 |     spamClassifier.classify("cheap clocks !!!").should.be.above(0.8); // very high number (spam)
74 |     spamClassifier
75 |       .classify("I don't know if this is a replica of windows")
76 |       .should.be.below(0.2); // low number (not spam)
77 |     spamClassifier.classify("replica").should.be.above(0.5); // high number (probably spam)
78 |     spamClassifier.classify("your").should.be.below(0.5); // low number (not spam)
79 |     spamClassifier
80 |       .classify("watch")
81 |       .should.be.above(0.3)
82 |       .and.below(0.7); // medium number (not sure if spam)
83 |   });
84 | });
85 | 


--------------------------------------------------------------------------------
/dist/core/multilabel/BinaryRelevanceDemo.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | // simple demonstration of Binary Relevance (one-vs.-all) classifier
  4 | var classifiers = require('..');
  5 | 
  6 | var trainSet = [{
  7 |   input: {
  8 |     'I': 1,
  9 |     'want': 1,
 10 |     'aa': 1
 11 |   },
 12 |   output: 'a'
 13 | }, {
 14 |   input: {
 15 |     'I': 1,
 16 |     'want': 1,
 17 |     'bb': 1
 18 |   },
 19 |   output: 'b'
 20 | }, {
 21 |   input: {
 22 |     'I': 1,
 23 |     'want': 1,
 24 |     'cc': 1
 25 |   },
 26 |   output: 'c'
 27 | }];
 28 | var classifier = new classifiers.multilabel.BinaryRelevance({
 29 |   binaryClassifierType: classifiers.Winnow.bind(0, {
 30 |     retrain_count: 10
 31 |   })
 32 | });
 33 | classifier.trainBatch(trainSet);
 34 | console.log("simple classification: ");
 35 | console.dir(classifier.classify({
 36 |   'I': 1,
 37 |   'want': 1,
 38 |   'aa': 1
 39 | })); // a
 40 | 
 41 | console.dir(classifier.classify({
 42 |   'I': 1,
 43 |   'need': 1,
 44 |   'bb': 1
 45 | })); // b
 46 | 
 47 | console.dir(classifier.classify({
 48 |   'I': 1,
 49 |   'feel': 1,
 50 |   'cc': 1
 51 | })); // c
 52 | 
 53 | console.dir(classifier.classify({
 54 |   'I': 1,
 55 |   'need': 1,
 56 |   'aa': 1,
 57 |   'bb': 1
 58 | })); // a,b
 59 | //console.log("model: ");
 60 | //console.dir(classifier);
 61 | 
 62 | console.log("explained classification: ");
 63 | console.dir(classifier.classify({
 64 |   'I': 1,
 65 |   'want': 1,
 66 |   'aa': 1
 67 | }, 5)); // a
 68 | 
 69 | console.dir(classifier.classify({
 70 |   'I': 1,
 71 |   'need': 1,
 72 |   'bb': 1
 73 | }, 5)); // b
 74 | 
 75 | console.dir(classifier.classify({
 76 |   'I': 1,
 77 |   'feel': 1,
 78 |   'cc': 1
 79 | }, 5)); // c
 80 | 
 81 | console.dir(classifier.classify({
 82 |   'I': 1,
 83 |   'need': 1,
 84 |   'aa': 1,
 85 |   'bb': 1
 86 | }, 5)); // a,b
 87 | 
 88 | console.log("classification with scores: ");
 89 | console.dir(classifier.classify({
 90 |   'I': 1,
 91 |   'need': 1,
 92 |   'aa': 1
 93 | }, 0, true)); // a
 94 | 
 95 | console.dir(classifier.classify({
 96 |   'I': 1,
 97 |   'need': 1,
 98 |   'bb': 1
 99 | }, 0, true)); // b
100 | 
101 | console.dir(classifier.classify({
102 |   'I': 1,
103 |   'need': 1,
104 |   'cc': 1
105 | }, 0, true)); // c
106 | 
107 | console.dir(classifier.classify({
108 |   'I': 1,
109 |   'need': 1,
110 |   'aa': 1,
111 |   'bb': 1
112 | }, 0, true)); // a,b
113 | 
114 | console.log("explained classification with scores: ");
115 | console.dir(classifier.classify({
116 |   'I': 1,
117 |   'need': 1,
118 |   'aa': 1
119 | }, 5, true)); // a
120 | 
121 | console.dir(classifier.classify({
122 |   'I': 1,
123 |   'need': 1,
124 |   'bb': 1
125 | }, 5, true)); // b
126 | 
127 | console.dir(classifier.classify({
128 |   'I': 1,
129 |   'need': 1,
130 |   'cc': 1
131 | }, 5, true)); // c
132 | 
133 | console.dir(classifier.classify({
134 |   'I': 1,
135 |   'need': 1,
136 |   'aa': 1,
137 |   'bb': 1
138 | }, 5, true)); // a,b


--------------------------------------------------------------------------------
/test/featuresTest/FeatureExtractorTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for feature extractors
  3 |  */
  4 | 
  5 | import {
  6 |   call,
  7 |   CollectionOfExtractors,
  8 |   Hypernyms,
  9 |   NGramsOfLetters,
 10 |   NGramsOfWords
 11 | } from "../../dist/features";
 12 | 
 13 | describe("word unigram feature extractor", function() {
 14 |   it("creates word unigram features", function() {
 15 |     var fe = NGramsOfWords(1);
 16 |     call(fe, "This is a demo, you know?").should.eql({
 17 |       This: 1,
 18 |       is: 1,
 19 |       a: 1,
 20 |       demo: 1,
 21 |       you: 1,
 22 |       know: 1
 23 |     });
 24 |   });
 25 | });
 26 | 
 27 | describe("word bigram feature extractor", function() {
 28 |   it("creates word bigram features", function() {
 29 |     var fe = NGramsOfWords(2);
 30 |     call(fe, "This is a demo, you know?").should.eql({
 31 |       "[start] This": 1,
 32 |       "This is": 1,
 33 |       "is a": 1,
 34 |       "a demo": 1,
 35 |       "demo you": 1,
 36 |       "you know": 1,
 37 |       "know [end]": 1
 38 |     });
 39 |   });
 40 | });
 41 | 
 42 | describe("word trigram-with-gap feature extractor", function() {
 43 |   it("creates word bigram features", function() {
 44 |     var fe = NGramsOfWords(3, true);
 45 |     call(fe, "This is a demo, you know?").should.eql({
 46 |       "[start] - is": 1,
 47 |       "This - a": 1,
 48 |       "is - demo": 1,
 49 |       "a - you": 1,
 50 |       "demo - know": 1,
 51 |       "you - [end]": 1
 52 |     });
 53 |   });
 54 | });
 55 | 
 56 | describe("hypernym extractor", function() {
 57 |   it("creates hypernym features", function() {
 58 |     var hypernyms = [
 59 |       {
 60 |         regexp: /demo/g,
 61 |         feature: "demonstration",
 62 |         confidence: 0.9
 63 |       }
 64 |     ];
 65 |     var fe = Hypernyms(hypernyms);
 66 |     call(fe, "This is a demo, you know?").should.eql({
 67 |       demonstration: 0.9
 68 |     });
 69 |   });
 70 | });
 71 | 
 72 | describe("letter unigram feature extractor", function() {
 73 |   it("creates letter unigram features", function() {
 74 |     var fe = NGramsOfLetters(1);
 75 |     call(fe, "This is a demo, you know?").should.eql({
 76 |       t: 1,
 77 |       h: 1,
 78 |       i: 1,
 79 |       s: 1,
 80 |       " ": 1,
 81 |       a: 1,
 82 |       d: 1,
 83 |       e: 1,
 84 |       m: 1,
 85 |       o: 1,
 86 |       ",": 1,
 87 |       y: 1,
 88 |       u: 1,
 89 |       k: 1,
 90 |       n: 1,
 91 |       w: 1,
 92 |       "?": 1
 93 |     });
 94 |   });
 95 | });
 96 | 
 97 | describe("collection of extractors", function() {
 98 |   it("creates collection of features", function() {
 99 |     var fe = CollectionOfExtractors([NGramsOfWords(1), NGramsOfWords(2)]);
100 |     call(fe, "This is a demo, you know?").should.eql({
101 |       This: 1,
102 |       is: 1,
103 |       a: 1,
104 |       demo: 1,
105 |       you: 1,
106 |       know: 1,
107 |       "[start] This": 1,
108 |       "This is": 1,
109 |       "is a": 1,
110 |       "a demo": 1,
111 |       "demo you": 1,
112 |       "you know": 1,
113 |       "know [end]": 1
114 |     });
115 |   });
116 | });
117 | 


--------------------------------------------------------------------------------
/docs/.vuepress/config.js:
--------------------------------------------------------------------------------
  1 | module.exports = {
  2 | 	title: 'neuro.js',
  3 | 	dest: './docs',
  4 | 	themeConfig: {
  5 | 		repo: 'https://github.com/intelligo-systems/neuro.js',
  6 | 		repoLabel: 'Repo',
  7 | 		docsDir: './docs',
  8 | 		editLinks: true,
  9 | 		logo: '/logo.png',
 10 | 		editLinkText: 'Found a bug? Help me improve this page!',
 11 | 		nav: [
 12 | 			{ text: 'Home', link: '/' }, 
 13 | 			{ text: 'Learn', link: '/learn/' }
 14 | 		],
 15 | 		version: '0.11.0-beta'
 16 | 	},
 17 | 	plugins: [
 18 | 		[
 19 | 			'@vuepress/google-analytics',
 20 | 			{
 21 | 				ga: 'UA-111622042-2'
 22 | 			}
 23 | 		],
 24 | 		[
 25 | 			'vuepress-plugin-rss',
 26 | 			{
 27 | 				base_url: '/',
 28 | 				site_url: 'https://neuro.js.org',
 29 | 				filter: frontmatter => frontmatter.date <= new Date(),
 30 | 				count: 20
 31 | 			}
 32 | 		]
 33 | 	],
 34 | 	head: [
 35 | 		[
 36 | 			'link',
 37 | 			{ rel: 'apple-touch-icon', sizes: '57x57', href: '/apple-icon-57x57.png' }
 38 | 		],
 39 | 		[
 40 | 			'link',
 41 | 			{ rel: 'apple-touch-icon', sizes: '60x60', href: '/apple-icon-60x60.png' }
 42 | 		],
 43 | 		[
 44 | 			'link',
 45 | 			{ rel: 'apple-touch-icon', sizes: '72x72', href: '/apple-icon-72x72.png' }
 46 | 		],
 47 | 		[
 48 | 			'link',
 49 | 			{ rel: 'apple-touch-icon', sizes: '76x76', href: '/apple-icon-76x76.png' }
 50 | 		],
 51 | 		[
 52 | 			'link',
 53 | 			{
 54 | 				rel: 'apple-touch-icon',
 55 | 				sizes: '114x114',
 56 | 				href: '/apple-icon-114x114.png'
 57 | 			}
 58 | 		],
 59 | 		[
 60 | 			'link',
 61 | 			{
 62 | 				rel: 'apple-touch-icon',
 63 | 				sizes: '120x120',
 64 | 				href: '/apple-icon-120x120.png'
 65 | 			}
 66 | 		],
 67 | 		[
 68 | 			'link',
 69 | 			{
 70 | 				rel: 'apple-touch-icon',
 71 | 				sizes: '144x144',
 72 | 				href: '/apple-icon-144x144.png'
 73 | 			}
 74 | 		],
 75 | 		[
 76 | 			'link',
 77 | 			{
 78 | 				rel: 'apple-touch-icon',
 79 | 				sizes: '152x152',
 80 | 				href: '/apple-icon-152x152.png'
 81 | 			}
 82 | 		],
 83 | 		[
 84 | 			'link',
 85 | 			{
 86 | 				rel: 'apple-touch-icon',
 87 | 				sizes: '180x180',
 88 | 				href: '/apple-icon-180x180.png'
 89 | 			}
 90 | 		],
 91 | 		[
 92 | 			'link',
 93 | 			{
 94 | 				rel: 'icon',
 95 | 				type: 'image/png',
 96 | 				sizes: '192x192',
 97 | 				href: '/android-icon-192x192.png'
 98 | 			}
 99 | 		],
100 | 		[
101 | 			'link',
102 | 			{
103 | 				rel: 'icon',
104 | 				type: 'image/png',
105 | 				sizes: '32x32',
106 | 				href: '/favicon-32x32.png'
107 | 			}
108 | 		],
109 | 		[
110 | 			'link',
111 | 			{
112 | 				rel: 'icon',
113 | 				type: 'image/png',
114 | 				sizes: '96x96',
115 | 				href: '/favicon-96x96.png'
116 | 			}
117 | 		],
118 | 		[
119 | 			'link',
120 | 			{
121 | 				rel: 'icon',
122 | 				type: 'image/png',
123 | 				sizes: '16x16',
124 | 				href: '/favicon-16x16.png'
125 | 			}
126 | 		],
127 | 		['link', { rel: 'manifest', href: '/manifest.json' }],
128 | 		['meta', { name: 'msapplication-TileColor', content: '#ffffff' }],
129 | 		[
130 | 			'meta',
131 | 			{ name: 'msapplication-TileImage', content: '/ms-icon-144x144.png' }
132 | 		],
133 | 		['meta', { name: 'theme-color', content: '#ffffff' }]
134 | 	]
135 | }
136 | 


--------------------------------------------------------------------------------
/dist/features/index.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | Object.defineProperty(exports, "__esModule", {
  4 |   value: true
  5 | });
  6 | exports.CollectionOfExtractors = CollectionOfExtractors;
  7 | exports.NGramsFromArray = NGramsFromArray;
  8 | exports.NGramsOfWords = NGramsOfWords;
  9 | exports.call = call;
 10 | exports.normalize = normalize;
 11 | exports.RegexpSplitter = exports.RegexpNormalizer = exports.LowerCaseNormalizer = exports.FeatureLookupTable = exports.Hypernyms = exports.NGramsOfLetters = void 0;
 12 | 
 13 | var NGramsOfLetters = require("./NGramsOfLetters");
 14 | 
 15 | exports.NGramsOfLetters = NGramsOfLetters;
 16 | 
 17 | var Hypernyms = require("./HypernymExtractor");
 18 | 
 19 | exports.Hypernyms = Hypernyms;
 20 | 
 21 | var FeatureLookupTable = require("./FeatureLookupTable");
 22 | 
 23 | exports.FeatureLookupTable = FeatureLookupTable;
 24 | 
 25 | var LowerCaseNormalizer = require("./LowerCaseNormalizer");
 26 | 
 27 | exports.LowerCaseNormalizer = LowerCaseNormalizer;
 28 | 
 29 | var RegexpNormalizer = require("./RegexpNormalizer");
 30 | 
 31 | exports.RegexpNormalizer = RegexpNormalizer;
 32 | 
 33 | var RegexpSplitter = require("./RegexpSplitter");
 34 | /**
 35 |  * CollectionOfExtractors - combines the features from several feature extractors. 
 36 |  * @param extractors - an array of other feature extractors. 
 37 |  * @param sample - a string.
 38 |  * @param features an initial hash of features (optional).
 39 |  * @return a hash with all features generated from the sample by the different extractors
 40 |  */
 41 | 
 42 | 
 43 | exports.RegexpSplitter = RegexpSplitter;
 44 | 
 45 | function CollectionOfExtractors(extractors) {
 46 |   return function (sample, features) {
 47 |     for (var i = 0; i < extractors.length; ++i) {
 48 |       extractors[i](sample, features);
 49 |     }
 50 |   };
 51 | }
 52 | 
 53 | ;
 54 | /**
 55 |  * Convert an array of words/tokens to a set of n-grams, for a given n, possibly with a gap:
 56 |  */
 57 | 
 58 | function NGramsFromArray(numOfWords, gap, grams, features) {
 59 |   for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
 60 |     grams.unshift("[start]");
 61 |     grams.push("[end]");
 62 |   }
 63 | 
 64 |   for (var i = 0; i <= grams.length - numOfWords; ++i) {
 65 |     var sliceOfWords = grams.slice(i, i + numOfWords);
 66 |     if (gap) sliceOfWords[1] = "-";
 67 |     var feature = sliceOfWords.join(" ");
 68 |     features[feature.trim()] = 1;
 69 |   }
 70 | 
 71 |   for (var i = 0; i < numOfWords - 1 - (gap ? 1 : 0); ++i) {
 72 |     grams.pop();
 73 |     grams.shift();
 74 |   }
 75 | }
 76 | 
 77 | function NGramsOfWords(numOfWords, gap) {
 78 |   return function (sample, features) {
 79 |     var words = sample.split(/[ \t,;:.!?]/).filter(function (a) {
 80 |       return !!a;
 81 |     }); // all non-empty words
 82 | 
 83 |     NGramsFromArray(numOfWords, gap, words, features);
 84 |   };
 85 | }
 86 | /**
 87 |  * Call the given featureExtractor on the given sample, and return the result.
 88 |  * Used for testing.
 89 |  */
 90 | 
 91 | 
 92 | function call(featureExtractor, sample) {
 93 |   var features = {};
 94 |   featureExtractor(sample, features);
 95 |   return features;
 96 | }
 97 | /**
 98 |  * If the input is a featureExtractor, return it as is.
 99 |  *
100 |  * If it is an array of featureExtractors, convert it to a CollectionOfExtractors.
101 |  *
102 |  */
103 | 
104 | 
105 | function normalize(featureExtractorOrArray) {
106 |   return !featureExtractorOrArray ? featureExtractorOrArray : Array.isArray(featureExtractorOrArray) ? new CollectionOfExtractors(featureExtractorOrArray) : featureExtractorOrArray;
107 | }


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at toroo.byamba@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/test/classifiersTest/NeuralWithNormalizerTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for Enhanced Classifier
  3 |  */
  4 | 
  5 | import { EnhancedClassifier, NeuralNetwork } from "../../dist/core";
  6 | import {
  7 |   LowerCaseNormalizer,
  8 |   NGramsOfWords,
  9 |   RegexpNormalizer
 10 | } from "../../dist/features";
 11 | 
 12 | describe("baseline - classifier without a normalizer", function() {
 13 |   it("errs on non-normalized sentencs", function() {
 14 |     var spamClassifier = new EnhancedClassifier({
 15 |       classifierType: NeuralNetwork,
 16 |       featureExtractor: NGramsOfWords(1),
 17 |       normalizer: null
 18 |     });
 19 | 
 20 |     spamClassifier.trainBatch([
 21 |       {
 22 |         input: "cheaper watches",
 23 |         output: [1]
 24 |       },
 25 |       {
 26 |         input: "",
 27 |         output: [0]
 28 |       }
 29 |     ]);
 30 | 
 31 |     spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
 32 |     spamClassifier.classify("cheapest watch es").should.be.below(0.2); // very high number (spam)
 33 |     spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
 34 |   });
 35 | });
 36 | 
 37 | describe("classifier with a single normalizer", function() {
 38 |   it("classifies sentences correctly", function() {
 39 |     var spamClassifier = new EnhancedClassifier({
 40 |       classifierType: NeuralNetwork,
 41 |       featureExtractor: NGramsOfWords(1),
 42 |       normalizer: RegexpNormalizer([
 43 |         {
 44 |           source: "er\\b",
 45 |           target: ""
 46 |         },
 47 |         {
 48 |           source: "est\\b",
 49 |           target: ""
 50 |         },
 51 |         {
 52 |           source: " es\\b",
 53 |           target: "es"
 54 |         }
 55 |       ])
 56 |     });
 57 | 
 58 |     spamClassifier.trainBatch([
 59 |       {
 60 |         input: "cheaper watches",
 61 |         output: [1]
 62 |       },
 63 |       {
 64 |         input: "",
 65 |         output: [0]
 66 |       }
 67 |     ]);
 68 | 
 69 |     spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
 70 |     spamClassifier.classify("cheapest watch es").should.be.above(0.8); // low number (not spam)
 71 |     spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
 72 |   });
 73 | });
 74 | 
 75 | describe("classifier with an array of normalizers", function() {
 76 |   it("classifies sentences correctly", function() {
 77 |     var spamClassifier = new EnhancedClassifier({
 78 |       classifierType: NeuralNetwork,
 79 |       featureExtractor: NGramsOfWords(1),
 80 |       normalizer: [
 81 |         LowerCaseNormalizer,
 82 |         RegexpNormalizer([
 83 |           {
 84 |             source: "er\\b",
 85 |             target: ""
 86 |           }
 87 |         ]),
 88 |         RegexpNormalizer([
 89 |           {
 90 |             source: "est\\b",
 91 |             target: ""
 92 |           }
 93 |         ]),
 94 |         RegexpNormalizer([
 95 |           {
 96 |             source: " es\\b",
 97 |             target: "es"
 98 |           }
 99 |         ])
100 |       ]
101 |     });
102 | 
103 |     spamClassifier.trainBatch([
104 |       {
105 |         input: "ChEaPeR WaTcHeS",
106 |         output: [1]
107 |       },
108 |       {
109 |         input: "",
110 |         output: [0]
111 |       }
112 |     ]);
113 | 
114 |     spamClassifier.classify("cheaper watches").should.be.above(0.8); // high number (spam)
115 |     spamClassifier.classify("cheapest watch es").should.be.above(0.8); // high number (spam)
116 |     spamClassifier.classify("cheapless clocks").should.be.below(0.2); // low number (not spam)
117 |   });
118 | });
119 | 


--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MulticlassSegmentationBayesTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for Multi-Label classification in the multiclass segmentation method
  3 |  */
  4 | 
  5 | import { Bayesian, multilabel } from "../../../src/core";
  6 | import { NGramsOfWords } from "../../../src/features";
  7 | import "../../sorted";
  8 | 
  9 | var MulticlassSegmentationBayes = multilabel.MulticlassSegmentation.bind(this, {
 10 |   multiclassClassifierType: Bayesian.bind(this, {
 11 |     calculateRelativeProbabilities: true
 12 |   }),
 13 |   featureExtractor: NGramsOfWords(1)
 14 | });
 15 | 
 16 | // MulticlassSegmentationBayes is now in repair
 17 | describe.skip("Multi-Label MCS Classifier Trained on Single-class inputs", function() {
 18 |   var classifier = new MulticlassSegmentationBayes();
 19 |   classifier.trainBatch([
 20 |     { input: "I want aa", output: "A" },
 21 |     { input: "I want bb", output: "B" },
 22 |     { input: "I want cc", output: "C" }
 23 |   ]);
 24 | 
 25 |   it("classifies 1-class samples", function() {
 26 |     classifier.classify("I want aa").should.eql(["A"]);
 27 |     classifier.classify("I want bb").should.eql(["B"]);
 28 |     classifier.classify("I want cc").should.eql(["C"]);
 29 |   });
 30 | 
 31 |   it("classifies 2-class samples", function() {
 32 |     classifier
 33 |       .classify("I want aa bb")
 34 |       .sorted()
 35 |       .should.eql(["A", "B"]);
 36 |     classifier
 37 |       .classify("I want bb cc")
 38 |       .sorted()
 39 |       .should.eql(["B", "C"]);
 40 |     classifier
 41 |       .classify("I want cc aa")
 42 |       .sorted()
 43 |       .should.eql(["A", "C"]);
 44 |   });
 45 | 
 46 |   it("classifies 2-class samples with a redundant word", function() {
 47 |     classifier
 48 |       .classify("I want aa and bb")
 49 |       .sorted()
 50 |       .should.eql(["A", "B"]);
 51 |     classifier
 52 |       .classify("I want bb and cc")
 53 |       .sorted()
 54 |       .should.eql(["B", "C"]);
 55 |     classifier
 56 |       .classify("I want cc and aa")
 57 |       .sorted()
 58 |       .should.eql(["A", "C"]);
 59 |   });
 60 | 
 61 |   it("classifies 3-class samples", function() {
 62 |     classifier
 63 |       .classify("I want cc and aa and bb")
 64 |       .sorted()
 65 |       .should.eql(["A", "B", "C"]);
 66 |   });
 67 | 
 68 |   // TODO: fix this case
 69 |   //	it('classifies 0-class samples', function() {
 70 |   //		classifier.classify("I want nothing").should.eql([]);
 71 |   //	});
 72 | });
 73 | 
 74 | /*describe('Multi-Label MCS Classifier Trained on two-class inputs', function() {
 75 | 	var classifier = new MulticlassSegmentationBayes();
 76 | 	classifier.trainBatch([
 77 | 		{input: {I:1 , want:1 , aa:1 , bb:1 }, output: ['A','B']},      // train on array with classes
 78 | 		{input: {I:1 , want:1 , bb:1 , cc:1 }, output: ['B','C']},      // train on array with classes
 79 | 		{input: {I:1 , want:1 , cc:1 , dd:1 }, output: [{C:1, D:1}]},   // train on set of classes
 80 | 		{input: {I:1 , want:1 , dd:1 , aa:1 }, output: [{D:1, A:1}]},   // train on set of classes
 81 | 	]);
 82 | 
 83 | 	it('classifies 1-class samples', function() {
 84 | 		classifier.classify({I:1 , want:1 , aa:1 }).should.eql(['A']);
 85 | 		//classifier.classify({I:1 , want:1 , bb:1 }).should.eql(['B']);
 86 | 		//classifier.classify({I:1 , want:1 , cc:1 }).should.eql(['C']);
 87 | 		//classifier.classify({I:1 , want:1 , dd:1 }).should.eql(['D']);
 88 | 	});
 89 | 
 90 | 	it('classifies 2-class samples', function() {
 91 | 		classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }).should.eql(['A','B']);
 92 | 		classifier.classify({I:1 , want:1 , bb:1 , and:1 , cc:1 }).should.eql(['B','C']);
 93 | 		//classifier.classify({I:1 , want:1 , cc:1 , and:1 , dd:1 }).should.eql(['C','D']);
 94 | 		//classifier.classify({I:1 , want:1 , dd:1 , and:1 , aa:1 }).should.eql(['D','A']);
 95 | 	});
 96 | });
 97 | 
 98 | 
 99 | */
100 | 


--------------------------------------------------------------------------------
/test/classifiersTest/WinnowTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for winnow classifier
  3 |  */
  4 | 
  5 | import { Winnow } from "../../dist/core";
  6 | 
  7 | var WinnowClassifier = Winnow.bind(this, {
  8 |   retrain_count: 10,
  9 |   do_averaging: false,
 10 |   margin: 1
 11 | });
 12 | 
 13 | describe("winnow classifier", function() {
 14 |   it("supports online training", function() {
 15 |     var classifier = new WinnowClassifier();
 16 |     classifier.trainOnline(
 17 |       {
 18 |         a: 1,
 19 |         b: 0
 20 |       },
 21 |       0
 22 |     );
 23 |     classifier
 24 |       .classify({
 25 |         a: 1,
 26 |         b: 0
 27 |       })
 28 |       .should.equal(0);
 29 |     classifier
 30 |       .classify({
 31 |         a: 0,
 32 |         b: 0
 33 |       })
 34 |       .should.equal(0);
 35 |     classifier
 36 |       .classify({
 37 |         a: 0,
 38 |         b: 1
 39 |       })
 40 |       .should.equal(0);
 41 |     classifier
 42 |       .classify({
 43 |         a: 1,
 44 |         b: 1
 45 |       })
 46 |       .should.equal(0);
 47 | 
 48 |     classifier.trainOnline(
 49 |       {
 50 |         a: 0,
 51 |         b: 1
 52 |       },
 53 |       1
 54 |     );
 55 |     classifier
 56 |       .classify({
 57 |         a: 1,
 58 |         b: 0
 59 |       })
 60 |       .should.equal(0);
 61 |     classifier
 62 |       .classify({
 63 |         a: 0,
 64 |         b: 1
 65 |       })
 66 |       .should.equal(1);
 67 |   });
 68 | 
 69 |   it("supports batch and online training", function() {
 70 |     var dataset = [
 71 |       {
 72 |         input: {
 73 |           a: 1,
 74 |           b: 0
 75 |         },
 76 |         output: 0
 77 |       },
 78 |       {
 79 |         input: {
 80 |           a: 0,
 81 |           b: 1
 82 |         },
 83 |         output: 1
 84 |       }
 85 |     ];
 86 |     //console.log("batch: ");
 87 |     var classifierBatch = new WinnowClassifier();
 88 |     classifierBatch.trainBatch(dataset);
 89 |     //console.dir(classifierBatch);
 90 | 
 91 |     //console.log("online: ");
 92 |     var classifierOnline = new WinnowClassifier();
 93 |     for (var i = 0; i <= classifierBatch.retrain_count; ++i)
 94 |       for (var d = 0; d < dataset.length; ++d)
 95 |         classifierOnline.trainOnline(dataset[d].input, dataset[d].output);
 96 |     //console.dir(classifierOnline);
 97 | 
 98 |     classifierOnline.should.eql(classifierBatch);
 99 |   });
100 | 
101 |   it("supports continuous output", function() {
102 |     var classifier = new WinnowClassifier();
103 |     classifier.trainOnline(
104 |       {
105 |         a: 1,
106 |         b: 0
107 |       },
108 |       0
109 |     );
110 |     classifier.trainOnline(
111 |       {
112 |         a: 0,
113 |         b: 1
114 |       },
115 |       1
116 |     );
117 |     classifier
118 |       .classify(
119 |         {
120 |           a: 1,
121 |           b: 0
122 |         },
123 |         0,
124 |         true
125 |       )
126 |       .should.be.below(0);
127 |     classifier
128 |       .classify(
129 |         {
130 |           a: 0,
131 |           b: 1
132 |         },
133 |         0,
134 |         true
135 |       )
136 |       .should.be.above(0);
137 |   });
138 | 
139 |   it("explains its decisions", function() {
140 |     var classifier = new WinnowClassifier();
141 |     classifier.trainOnline(
142 |       {
143 |         a: 1,
144 |         b: 0
145 |       },
146 |       0
147 |     );
148 |     classifier
149 |       .classify(
150 |         {
151 |           a: 0,
152 |           b: 0
153 |         },
154 |         /*explain=*/ 1
155 |       )
156 |       .should.have.property("explanation")
157 |       .with.lengthOf(1);
158 |     classifier
159 |       .classify(
160 |         {
161 |           a: 0,
162 |           b: 0
163 |         },
164 |         /*explain=*/ 3
165 |       )
166 |       .should.have.property("explanation")
167 |       .with.lengthOf(3);
168 |   });
169 | });
170 | 


--------------------------------------------------------------------------------
/src/core/svm/svmcommon.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Utilities common to SVM wrappers
  3 |  */
  4 | 
  5 | var temp = require('temp')
  6 |   , fs   = require('fs')
  7 |   , svmlight = require('../../formats/svmlight')
  8 |   , _ = require('underscore')._
  9 | 
 10 | /**
 11 |  * Writes the given dataset to a file in svm-light format.
 12 |  * @return the file name.
 13 |  */
 14 | module.exports.writeDatasetToFile = function(dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) {
 15 | 	if (model_file_prefix) {
 16 | 		var learnFile = model_file_prefix+".learn";
 17 | 		var fd = fs.openSync(learnFile, 'w');
 18 | 	} else {
 19 | 		var tempFile = temp.openSync({prefix:default_file_prefix+"-", suffix:".learn"});
 20 | 		var learnFile = tempFile.path;
 21 | 		var fd = tempFile.fd;
 22 | 	}
 23 | 	var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber);
 24 | 	fs.writeSync(fd, datasetSvmlight);
 25 | 	fs.closeSync(fd);
 26 | 	
 27 | 	return learnFile;
 28 | }
 29 | 
 30 | /**
 31 |  * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map).
 32 |  * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists).
 33 |  * @param bias if nonzero, added at the beginning of features.
 34 |  * @param features a map {feature_i: value_i, ....} (i >= 1)
 35 |  * @param explain (int) if positive, generate explanation about the classification.
 36 |  * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1.
 37 |  * @param featureLookupTable if not null, used for creating meaningful explanations.
 38 |  * @returns a classification value.
 39 |  */
 40 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) {
 41 | 	if (explain>0) var explanations = [];
 42 | 	var result = 0;
 43 | 	if (bias && modelMap[0]) {
 44 | 		var weight = modelMap[0];
 45 | 		var relevance = bias*modelMap[0];
 46 | 		result = relevance;
 47 | 		if (explain>0) explanations.push(
 48 | 				{
 49 | 					feature: 'bias',
 50 | 					value: bias,
 51 | 					weight: weight,
 52 | 					relevance: relevance,
 53 | 				}
 54 | 		);
 55 | 		
 56 | 	}
 57 | 	
 58 | 	for (var feature in features) {
 59 | 		var featureInModelMap = parseInt(feature)+(bias?1:0);
 60 | 		if (featureInModelMap in modelMap) {
 61 | 			var weight = modelMap[featureInModelMap];
 62 | 			var value = features[feature];
 63 | 			var relevance = weight*value;
 64 | 			result += relevance;
 65 | 
 66 | 			if (explain>0) explanations.push(
 67 | 					{
 68 | 						feature: featureLookupTable? (featureLookupTable.numberToFeature(feature)||"?"): feature,
 69 | 						value: value,
 70 | 						weight: weight,
 71 | 						relevance: relevance,
 72 | 					}
 73 | 			);
 74 | 		}
 75 | 	}
 76 | 	
 77 | 	if (!continuous_output)
 78 | 		result = (result>0? 1: 0);
 79 | 	if (_.isNaN(result)) {
 80 | 		console.dir(explanations);
 81 | 		throw new Error("result is NaN when classifying "+features+" with "+JSON.stringify(modelMap))
 82 | 	}
 83 | 	if (explain>0) {
 84 | 		explanations.sort(function(a,b){return Math.abs(b.relevance)-Math.abs(a.relevance)});
 85 | 		var explanations = _.filter(explanations, function(num){ return num.relevance!=0 });
 86 | 
 87 | 		// explanations.splice(explain, explanations.length-explain);  // "explain" is the max length of explanation.
 88 | 
 89 | 		
 90 | 		if (!this.detailed_explanations) {
 91 | 			// var sprintf = require('sprintf').sprintf;
 92 | 			explanations = explanations.map(function(e) {
 93 | 				// return sprintf("%s%+1.2f", e.feature, e.relevance);
 94 | 				return [e.feature, e.relevance];
 95 | 			});
 96 | 
 97 | 			explanations = _.sortBy(explanations, function(num){ return num[1] }).reverse()
 98 | 
 99 | 		}
100 | 		return {
101 | 			classification: result,
102 | 			explanation: explanations
103 | 		};
104 | 	} else {
105 | 		return result;
106 | 	}
107 | }
108 | 
109 | 


--------------------------------------------------------------------------------
/dist/core/svm/svmcommon.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | /**
  4 |  * Utilities common to SVM wrappers
  5 |  */
  6 | var temp = require('temp'),
  7 |     fs = require('fs'),
  8 |     svmlight = require('../../formats/svmlight'),
  9 |     _ = require('underscore')._;
 10 | /**
 11 |  * Writes the given dataset to a file in svm-light format.
 12 |  * @return the file name.
 13 |  */
 14 | 
 15 | 
 16 | module.exports.writeDatasetToFile = function (dataset, bias, binarize, model_file_prefix, default_file_prefix, firstFeatureNumber) {
 17 |   if (model_file_prefix) {
 18 |     var learnFile = model_file_prefix + ".learn";
 19 |     var fd = fs.openSync(learnFile, 'w');
 20 |   } else {
 21 |     var tempFile = temp.openSync({
 22 |       prefix: default_file_prefix + "-",
 23 |       suffix: ".learn"
 24 |     });
 25 |     var learnFile = tempFile.path;
 26 |     var fd = tempFile.fd;
 27 |   }
 28 | 
 29 |   var datasetSvmlight = svmlight.toSvmLight(dataset, bias, binarize, firstFeatureNumber);
 30 |   fs.writeSync(fd, datasetSvmlight);
 31 |   fs.closeSync(fd);
 32 |   return learnFile;
 33 | };
 34 | /**
 35 |  * A utility that classifies a given sample (given as a feature-value map) using a model (given as a feature-weight map).
 36 |  * @param modelMap a map {feature_i: weight_i, ....} (i >= 0; 0 is the weight of the bias, if exists).
 37 |  * @param bias if nonzero, added at the beginning of features.
 38 |  * @param features a map {feature_i: value_i, ....} (i >= 1)
 39 |  * @param explain (int) if positive, generate explanation about the classification.
 40 |  * @param continuous_output (boolean) if true, return a score; if false, return 0 or 1.
 41 |  * @param featureLookupTable if not null, used for creating meaningful explanations.
 42 |  * @returns a classification value.
 43 |  */
 44 | 
 45 | 
 46 | module.exports.classifyWithModelMap = function (modelMap, bias, features, explain, continuous_output, featureLookupTable) {
 47 |   if (explain > 0) var explanations = [];
 48 |   var result = 0;
 49 | 
 50 |   if (bias && modelMap[0]) {
 51 |     var weight = modelMap[0];
 52 |     var relevance = bias * modelMap[0];
 53 |     result = relevance;
 54 |     if (explain > 0) explanations.push({
 55 |       feature: 'bias',
 56 |       value: bias,
 57 |       weight: weight,
 58 |       relevance: relevance
 59 |     });
 60 |   }
 61 | 
 62 |   for (var feature in features) {
 63 |     var featureInModelMap = parseInt(feature) + (bias ? 1 : 0);
 64 | 
 65 |     if (featureInModelMap in modelMap) {
 66 |       var weight = modelMap[featureInModelMap];
 67 |       var value = features[feature];
 68 |       var relevance = weight * value;
 69 |       result += relevance;
 70 |       if (explain > 0) explanations.push({
 71 |         feature: featureLookupTable ? featureLookupTable.numberToFeature(feature) || "?" : feature,
 72 |         value: value,
 73 |         weight: weight,
 74 |         relevance: relevance
 75 |       });
 76 |     }
 77 |   }
 78 | 
 79 |   if (!continuous_output) result = result > 0 ? 1 : 0;
 80 | 
 81 |   if (_.isNaN(result)) {
 82 |     console.dir(explanations);
 83 |     throw new Error("result is NaN when classifying " + features + " with " + JSON.stringify(modelMap));
 84 |   }
 85 | 
 86 |   if (explain > 0) {
 87 |     explanations.sort(function (a, b) {
 88 |       return Math.abs(b.relevance) - Math.abs(a.relevance);
 89 |     });
 90 | 
 91 |     var explanations = _.filter(explanations, function (num) {
 92 |       return num.relevance != 0;
 93 |     }); // explanations.splice(explain, explanations.length-explain);  // "explain" is the max length of explanation.
 94 | 
 95 | 
 96 |     if (!this.detailed_explanations) {
 97 |       // var sprintf = require('sprintf').sprintf;
 98 |       explanations = explanations.map(function (e) {
 99 |         // return sprintf("%s%+1.2f", e.feature, e.relevance);
100 |         return [e.feature, e.relevance];
101 |       });
102 |       explanations = _.sortBy(explanations, function (num) {
103 |         return num[1];
104 |       }).reverse();
105 |     }
106 | 
107 |     return {
108 |       classification: result,
109 |       explanation: explanations
110 |     };
111 |   } else {
112 |     return result;
113 |   }
114 | };


--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MetaLabelerLanguageModelTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for Multi-Label classification in the Meta-Labeler method,
  3 |  * with Cross-Language-Model as the underlying ranker.
  4 |  */
  5 | 
  6 | import { multilabel, Winnow } from "../../../dist/core";
  7 | import "../../sorted";
  8 | 
  9 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, {
 10 |   binaryClassifierType: Winnow.bind(this, {
 11 |     promotion: 1.5,
 12 |     demotion: 0.5,
 13 |     margin: 1,
 14 |     retrain_count: 10
 15 |   })
 16 | });
 17 | 
 18 | var CrossLanguageModelClassifier = multilabel.CrossLanguageModel.bind(this, {
 19 |   smoothingCoefficient: 0.9,
 20 |   labelFeatureExtractor: function(string, features) {
 21 |     if (!features) features = {};
 22 |     features[string] = 1;
 23 |     return features;
 24 |   }
 25 | });
 26 | 
 27 | var MetaLabelerLanguageModel = multilabel.MetaLabeler.bind(this, {
 28 |   rankerType: CrossLanguageModelClassifier,
 29 |   counterType: BinaryRelevanceWinnow
 30 | });
 31 | 
 32 | var dataset = [
 33 |   { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class
 34 |   { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect)
 35 |   { input: { I: 1, want: 1, cc: 1 }, output: "C" }
 36 | ];
 37 | 
 38 | describe("CLIR Meta-Labeler batch-trained on Single-class inputs", function() {
 39 |   var classifierBatch = new MetaLabelerLanguageModel();
 40 |   classifierBatch.trainBatch(dataset);
 41 | 
 42 |   var classifier = classifierBatch;
 43 | 
 44 |   it("classifies 1-class samples", function() {
 45 |     classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
 46 |     classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
 47 |     classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(["C"]);
 48 |   });
 49 | 
 50 |   it("knows its classes", function() {
 51 |     classifier.getAllClasses().should.eql(["A", "B", "C"]);
 52 |   });
 53 | 
 54 |   it("explains its decisions", function() {
 55 |     var ab = classifier.classify(
 56 |       { I: 1, want: 1, aa: 1, and: 1, bb: 1 },
 57 |       /*explain=*/ 3
 58 |     );
 59 |     ab.should.have.property("explanation").with.property("ranking");
 60 |     ab.should.have.property("explanation").with.property("counting");
 61 |   });
 62 | });
 63 | 
 64 | describe("CLIR Meta-Labeler batch-trained on two-class inputs", function() {
 65 |   var classifier = new MetaLabelerLanguageModel();
 66 |   classifier.trainBatch([
 67 |     { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] }, // train on array with classes
 68 |     { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] }, // train on array with classes
 69 |     { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] },
 70 |     { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] }
 71 |   ]);
 72 | 
 73 |   it("classifies 2-class samples", function() {
 74 |     classifier
 75 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 })
 76 |       .sorted()
 77 |       .should.eql(["A", "B"]);
 78 |     classifier
 79 |       .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 })
 80 |       .sorted()
 81 |       .should.eql(["B", "C"]);
 82 |     classifier
 83 |       .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 })
 84 |       .sorted()
 85 |       .should.eql(["C", "D"]);
 86 |     classifier
 87 |       .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 })
 88 |       .sorted()
 89 |       .should.eql(["A", "D"]);
 90 |   });
 91 | 
 92 |   it("explains its decisions", function() {
 93 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4);
 94 |     classifier
 95 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
 96 |       .should.have.property("explanation")
 97 |       .with.property("ranking");
 98 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
 99 |     classifier
100 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
101 |       .should.have.property("explanation")
102 |       .with.property("counting");
103 |   });
104 | });
105 | 


--------------------------------------------------------------------------------
/test/classifiersTest/SvmMulticlassTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for SvmLinear classifier (a wrapper for LibLinear), as a multi-class classifier.
  3 |  */
  4 | 
  5 | import { EnhancedClassifier, SvmLinear } from "../../dist/core";
  6 | import { FeatureLookupTable } from "../../dist/features";
  7 | 
  8 | if (!SvmLinear.isInstalled()) {
  9 |   console.warn("liblinear_train not found - SvmMulticlass tests skipped.");
 10 | } else {
 11 |   var SvmClassifier = SvmLinear.bind(0, {
 12 |     multiclass: true,
 13 |     learn_args: "-c 20.0"
 14 |   });
 15 | 
 16 |   describe(
 17 |     "SVM-LibLinear multiclass" + " with numeric features and numeric labels",
 18 |     function() {
 19 |       var trainSet = [
 20 |         {
 21 |           input: [0, 0],
 22 |           output: 3
 23 |         },
 24 |         {
 25 |           input: [1, 1],
 26 |           output: 3
 27 |         },
 28 | 
 29 |         {
 30 |           input: [0, 1],
 31 |           output: 4
 32 |         },
 33 |         {
 34 |           input: [1, 2],
 35 |           output: 4
 36 |         },
 37 | 
 38 |         {
 39 |           input: [0, 2],
 40 |           output: 5
 41 |         },
 42 |         {
 43 |           input: [1, 3],
 44 |           output: 5
 45 |         }
 46 |       ];
 47 | 
 48 |       var classifier = new SvmClassifier();
 49 |       classifier.trainBatch(trainSet);
 50 | 
 51 |       it("supports multi-class output", function() {
 52 |         classifier.classify([1, 0]).should.equal(3);
 53 |         classifier.classify([0, 1.3]).should.equal(4);
 54 |         classifier.classify([0, 1.7]).should.equal(5);
 55 |         classifier.classify([0, 3]).should.equal(5);
 56 |       });
 57 | 
 58 |       it("explains its decisions", function() {
 59 |         classifier
 60 |           .classify([1, 0], 3)
 61 |           .should.have.property("explanation")
 62 |           .with.lengthOf(3);
 63 |         classifier
 64 |           .classify([0, 2], 5)
 65 |           .should.have.property("explanation")
 66 |           .with.lengthOf(3);
 67 |       });
 68 | 
 69 |       it("supports classification with scores", function() {
 70 |         classifier.classify([1, 0], 0, true).should.have.lengthOf(3);
 71 |         classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2);
 72 |         classifier.classify([0, 1.7], 0, true)[0][0].should.equal(5);
 73 |         classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5);
 74 |       });
 75 |     }
 76 |   );
 77 | 
 78 |   var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, {
 79 |     classifierType: SvmClassifier,
 80 |     featureLookupTable: new FeatureLookupTable()
 81 |   });
 82 | 
 83 |   var SvmClassifierStringLabels = EnhancedClassifier.bind(this, {
 84 |     classifierType: SvmClassifier,
 85 |     labelLookupTable: new FeatureLookupTable()
 86 |   });
 87 | 
 88 |   describe(
 89 |     "SVM-LibLinear multiclass" + " with numeric features and string labels",
 90 |     function() {
 91 |       var trainSet = [
 92 |         {
 93 |           input: [0, 0],
 94 |           output: "a"
 95 |         },
 96 |         {
 97 |           input: [1, 1],
 98 |           output: "a"
 99 |         },
100 | 
101 |         {
102 |           input: [0, 1],
103 |           output: "b"
104 |         },
105 |         {
106 |           input: [1, 2],
107 |           output: "b"
108 |         },
109 | 
110 |         {
111 |           input: [0, 2],
112 |           output: "c"
113 |         },
114 |         {
115 |           input: [1, 3],
116 |           output: "c"
117 |         }
118 |       ];
119 | 
120 |       var classifier = new SvmClassifierStringLabels();
121 |       classifier.trainBatch(trainSet);
122 | 
123 |       it("supports multi-class output", function() {
124 |         classifier.classify([1, 0]).should.equal("a");
125 |         classifier.classify([0, 1.3]).should.equal("b");
126 |         classifier.classify([0, 1.7]).should.equal("c");
127 |         classifier.classify([0, 3]).should.equal("c");
128 |       });
129 | 
130 |       it("explains its decisions", function() {
131 |         classifier
132 |           .classify([1, 0], 3)
133 |           .should.have.property("explanation")
134 |           .with.lengthOf(3);
135 |         classifier
136 |           .classify([0, 2], 5)
137 |           .should.have.property("explanation")
138 |           .with.lengthOf(3);
139 |       });
140 | 
141 |       it("supports classification with scores", function() {
142 |         classifier.classify([1, 0], 0, true).should.have.lengthOf(3);
143 |         classifier.classify([0, 1.3], 0, true)[0].should.have.lengthOf(2);
144 |         classifier.classify([0, 1.7], 0, true)[0][0].should.equal("c"); // must be the first!
145 |         classifier.classify([0, 3], 0, true)[0][1].should.be.within(2.5, 3.5);
146 |       });
147 |     }
148 |   );
149 | }
150 | 


--------------------------------------------------------------------------------
/test/classifiersTest/SvmTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for SvmLinear classifier (a wrapper for LibLinear) and SvmPerf classifier.
  3 |  */
  4 | 
  5 | import { EnhancedClassifier, SvmLinear, SvmPerf } from "../../dist/core";
  6 | import { FeatureLookupTable } from "../../dist/features";
  7 | 
  8 | function test(name, SvmClassifier) {
  9 |   describe(name + " with numeric features", function() {
 10 |     var trainSet = [
 11 |       {
 12 |         input: [0, 0],
 13 |         output: 0
 14 |       },
 15 |       {
 16 |         input: [1, 1],
 17 |         output: 0
 18 |       },
 19 |       {
 20 |         input: [0, 1],
 21 |         output: 1
 22 |       },
 23 |       {
 24 |         input: [1, 2],
 25 |         output: 1
 26 |       }
 27 |     ];
 28 | 
 29 |     var classifier = new SvmClassifier();
 30 |     classifier.trainBatch(trainSet);
 31 | 
 32 |     it("finds the maximal margin separator", function() {
 33 |       // the max-margin separating line goes through [0,0.5] and [1,1.5]. It is:
 34 |       //        0.5+x-y = 0
 35 |       //  or:   2y-2x-1 = 0
 36 |       //classifier.modelMap.should.eql({ '0': -1, '1': -2, '2': 2 });  // the LibLinear algorithm is not accurate:
 37 |       var modelWeights = classifier.getModelWeights();
 38 | 
 39 |       modelWeights[0].should.be.within(-1.5, -0.5);
 40 |       modelWeights[1].should.be.within(-2.5, -1.5);
 41 |       modelWeights[2].should.be.within(1.5, 2.5);
 42 |     });
 43 | 
 44 |     it("supports binary output", function() {
 45 |       classifier.classify([0, 2]).should.eql(1);
 46 |       classifier.classify([1, 0]).should.eql(0);
 47 |     });
 48 | 
 49 |     //		it('explains its decisions', function() {
 50 |     //			classifier.classify([0,2], 2).should.have.property("explanation").with.lengthOf(2);
 51 |     //			classifier.classify([1,0], 3).should.have.property("explanation").with.lengthOf(3);
 52 |     //		})
 53 | 
 54 |     it("supports continuous output", function() {
 55 |       classifier.classify([0, 2], 0, true).should.be.within(2.5, 3.5); // should equal 3, but it is not accurate enough
 56 |       classifier.classify([1, 0], 0, true).should.be.within(-3.5, -2.5); // should equal -3, but it is not accurate enough
 57 |     });
 58 |   });
 59 | 
 60 |   var SvmClassifierStringFeatures = EnhancedClassifier.bind(this, {
 61 |     classifierType: SvmClassifier,
 62 |     featureLookupTable: new FeatureLookupTable()
 63 |   });
 64 | 
 65 |   describe(name + " with string features", function() {
 66 |     var trainSet = [
 67 |       {
 68 |         input: {
 69 |           a: 0,
 70 |           b: 0
 71 |         },
 72 |         output: 0
 73 |       },
 74 |       {
 75 |         input: {
 76 |           a: 1,
 77 |           b: 1
 78 |         },
 79 |         output: 0
 80 |       },
 81 |       {
 82 |         input: {
 83 |           a: 0,
 84 |           b: 1
 85 |         },
 86 |         output: 1
 87 |       },
 88 |       {
 89 |         input: {
 90 |           a: 1,
 91 |           b: 2
 92 |         },
 93 |         output: 1
 94 |       }
 95 |     ];
 96 | 
 97 |     var classifier = new SvmClassifierStringFeatures();
 98 |     classifier.trainBatch(trainSet);
 99 | 
100 |     it("supports binary output", function() {
101 |       classifier
102 |         .classify({
103 |           a: 0,
104 |           b: 2
105 |         })
106 |         .should.eql(1);
107 |       classifier
108 |         .classify({
109 |           a: 1,
110 |           b: 0
111 |         })
112 |         .should.eql(0);
113 |     });
114 | 
115 |     //		it('explains its classifications', function() {
116 |     //			classifier.classify({a:0, b:2}, 2).should.have.property("explanation").with.lengthOf(2);
117 |     //			classifier.classify({a:1, b:0}, 3).should.have.property("explanation").with.lengthOf(3);
118 |     //		})
119 | 
120 |     it("supports continuous output", function() {
121 |       classifier
122 |         .classify(
123 |           {
124 |             a: 0,
125 |             b: 2
126 |           },
127 |           0,
128 |           true
129 |         )
130 |         .should.be.above(0);
131 |       classifier
132 |         .classify(
133 |           {
134 |             a: 1,
135 |             b: 0
136 |           },
137 |           0,
138 |           true
139 |         )
140 |         .should.be.below(0);
141 |     });
142 |   });
143 | } // end of function
144 | 
145 | if (SvmPerf.isInstalled())
146 |   test(
147 |     "SVM-Perf",
148 |     SvmPerf.bind(this, {
149 |       learn_args: "-c 20.0"
150 |     })
151 |   );
152 | else console.warn("svm_perf_learn not found - SvmPerf tests skipped.");
153 | 
154 | if (SvmLinear.isInstalled())
155 |   test(
156 |     "SVM-LibLinear",
157 |     SvmLinear.bind(this, {
158 |       learn_args: "-c 20.0",
159 |       multiclass: false
160 |     })
161 |   );
162 | else console.warn("liblinear_train not found - SvmLinear tests skipped.");
163 | 


--------------------------------------------------------------------------------
/src/features/FeatureLookupTable.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * FeatureLookupTable - a table for converting features to numbers and vice versa
  3 |  */
  4 | class FeatureLookupTable {
  5 | 	constructor() {
  6 | 		this.featureIndexToFeatureName = [undefined];
  7 | 		this.featureNameToFeatureIndex = { undefined: 0 };
  8 | 	}
  9 | }
 10 | 
 11 | FeatureLookupTable.prototype = {
 12 | 	
 13 | 	// add a single feature, if it does not exist
 14 | 	addFeature: function(feature) {
 15 | 		if (!(feature in this.featureNameToFeatureIndex)) {
 16 | 			var newIndex = this.featureIndexToFeatureName.length;
 17 | 			this.featureIndexToFeatureName.push(feature);
 18 | 			this.featureNameToFeatureIndex[feature] = newIndex;
 19 | 		}
 20 | 	},
 21 | 	
 22 | 	// add all features in the given hash or array
 23 | 	addFeatures: function(hash) {
 24 | 		if (hash instanceof Array) {
 25 | 			for (var index in hash)
 26 | 				this.addFeature(hash[index]);
 27 | 		} else if (hash instanceof Object) {
 28 | 			for (var feature in hash)
 29 | 				this.addFeature(feature);
 30 | 		} 
 31 | 		else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: "+JSON.stringify(hash));
 32 | 	},
 33 | 
 34 | 	// add all features in all hashes in the given array
 35 | 	addFeaturess: function(hashes) {
 36 | 		for (var i=0; i<hashes.length; ++i)
 37 | 			this.addFeatures(hashes[i]);
 38 | 	},
 39 | 	
 40 | 	/**
 41 | 	 * Convert the given feature to a numeric index.
 42 | 	 */
 43 | 	featureToNumber: function(feature) {
 44 | 		this.addFeature(feature);
 45 | 		return this.featureNameToFeatureIndex[feature];
 46 | 	},
 47 | 	
 48 | 	numberToFeature: function(number) {
 49 | 		return this.featureIndexToFeatureName[number];
 50 | 	},
 51 | 		
 52 | 	/**
 53 | 	 * Convert the given hash of features to a numeric array, using 0 for padding.
 54 | 	 * If some features in the hash do not exist - they will be added.
 55 | 	 * @param hash any hash, for example, {a: 111, b: 222, c: 333}
 56 | 	 * @return a matching array, based on the current feature table. For example: [0, 111, 222, 0, 333]
 57 | 	 * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
 58 | 	 */
 59 | 	hashToArray: function(hash) {
 60 | 		this.addFeatures(hash);
 61 | 		var array = [];
 62 | 		for (var featureIndex=0; featureIndex<this.featureIndexToFeatureName.length; ++featureIndex)
 63 | 			array[featureIndex]=0;
 64 | 		if (hash instanceof Array) {
 65 | 			for (var i in hash)
 66 | 				array[this.featureNameToFeatureIndex[hash[i]]] = true;
 67 | 		} else if (hash instanceof Object) {
 68 | 			for (var feature in hash)
 69 | 				array[this.featureNameToFeatureIndex[feature]] = hash[feature];
 70 | 		}
 71 | 		else throw new Error("Unsupported type: "+JSON.stringify(hash));
 72 | 		return array;
 73 | 	},
 74 | 	
 75 | 	/**
 76 | 	 * Convert all the given hashes of features to numeric arrays, using 0 for padding.
 77 | 	 * If some features in some of the hashes do not exist - they will be added.
 78 | 	 * @param hashes an array of hashes, for example, [{a: 111, b: 222}, {a: 11, c: 33}, ...] 
 79 | 	 * @return an array of matching arrays, based on the current feature table. For example: [[111, 222], [11, 0, 33]]
 80 | 	 */
 81 | 	hashesToArrays: function(hashes) {
 82 | 		this.addFeaturess(hashes);
 83 | 	  
 84 | 		var arrays = [];
 85 | 		for (var i=0; i<hashes.length; ++i) {
 86 | 			arrays[i] = [];
 87 | 			for (var feature in this.featureNameToFeatureIndex)
 88 | 				arrays[i][this.featureNameToFeatureIndex[feature]] = hashes[i][feature] || 0;
 89 | 		}
 90 | 		return arrays;
 91 | 	},
 92 | 
 93 | 	/**
 94 | 	 * Convert the given numeric array to a hash of features, ignoring zero values.
 95 | 	 * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
 96 | 	 */
 97 | 	arrayToHash: function(array) {
 98 | 		var hash = {};
 99 | 		for (var feature in this.featureNameToFeatureIndex) {
100 | 			if (array[this.featureNameToFeatureIndex[feature]])
101 | 				hash[feature] = array[this.featureNameToFeatureIndex[feature]];
102 | 		}
103 | 		return hash;
104 | 	},
105 | 	
106 | 	/**
107 | 	 * Convert the given numeric arrays to array of hashes of features, ignoring zero values.
108 | 	 */
109 | 	arraysToHashes: function(arrays) {
110 | 		var hashes = [];
111 | 		for (var i=0; i<arrays.length; ++i)
112 | 			hashes[i] = this.arrayToHash(arrays[i]);
113 | 		return hashes;
114 | 	},
115 | 	
116 | 	
117 | 	toJSON: function() {
118 | 		return {
119 | 			featureIndexToFeatureName: this.featureIndexToFeatureName,
120 | 			featureNameToFeatureIndex: this.featureNameToFeatureIndex,
121 | 		}
122 | 	},
123 | 	
124 | 	fromJSON: function(json) {
125 | 		this.featureIndexToFeatureName = json.featureIndexToFeatureName;
126 | 		this.featureNameToFeatureIndex = json.featureNameToFeatureIndex;
127 | 	},
128 | }
129 | 
130 | module.exports = FeatureLookupTable;
131 | 


--------------------------------------------------------------------------------
/src/formats/arff.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Static Utilities for writing files in ARFF format - the format used by WEKA.
  3 |  *
  4 |  * @note for READING files in ARFF format, see https://github.com/chesles/node-arff
  5 |  * 
  6 |  * @author Erel Segal-Halevi
  7 |  * @since 2013-08
  8 |  */
  9 | 
 10 | 
 11 | var _ = require('underscore')._;
 12 | var FeaturesUnit = require('../features');
 13 | 
 14 | /**
 15 |  * convert a single dataset to Weka ARFF string.
 16 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 17 |  * @param relationName string for the @relation on the top of the file.
 18 |  * @param featureExtractor [optional]
 19 |  * @return an ARFF string. 
 20 |  */
 21 | exports.toARFF = function(dataset, relationName, featureExtractor) {
 22 | 	if (!featureExtractor) featureExtractor=_.identity;
 23 | 	
 24 | 	var featureLookupTable = new FeaturesUnit.FeatureLookupTable();
 25 | 	
 26 | 	// Extract the input attributes (- features):
 27 | 	dataset.forEach(function(datum) {
 28 | 		datum.input = featureExtractor(datum.input, {});
 29 | 		if (!_.isObject(datum.input))
 30 | 			throw new Error("Expected feature vector to be a hash, but found "+JSON.stringify(datum.input));
 31 | 		featureLookupTable.addFeatures(datum.input);
 32 | 	});
 33 | 	
 34 | 	// Extract the target attributes (- classes):
 35 | 	dataset.forEach(function(datum) {
 36 | 		if (!_.isArray(datum.output))
 37 | 			datum.output = [datum.output];
 38 | 		datum.output = datum.output.map(function(anOutput) {
 39 | 			return _.isString(anOutput)? anOutput: JSON.stringify(anOutput);
 40 | 		});
 41 | 		featureLookupTable.addFeatures(datum.output);
 42 | 	});
 43 | 
 44 | 	//console.dir(featureLookupTable);
 45 | 	return toARFFLocal(dataset, relationName, featureLookupTable);
 46 | }
 47 | 
 48 | /**
 49 |  * convert many dataset to Weka ARFF files.
 50 |  * @param mapFileNameToDataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 51 |  * @return an ARFF file. 
 52 |  */
 53 | exports.toARFFs = function(outputFolder, mapFileNameToDataset, featureExtractor) {
 54 | 	if (!featureExtractor) featureExtractor=_.identity;
 55 | 	var featureLookupTable = new FeaturesUnit.FeatureLookupTable();
 56 | 	
 57 | 	// Extract the input attributes (- features):
 58 | 	for (var relationName in mapFileNameToDataset) {
 59 | 		mapFileNameToDataset[relationName].forEach(function(datum) {
 60 | 			datum.input = featureExtractor(datum.input, {});
 61 | 			if (!_.isObject(datum.input))
 62 | 				throw new Error("Expected feature vector to be a hash, but found "+JSON.stringify(datum.input));
 63 | 			featureLookupTable.addFeatures(datum.input);
 64 | 		});
 65 | 	}
 66 | 	
 67 | 	
 68 | 	// Extract the target attributes (- classes):
 69 | 	for (var relationName in mapFileNameToDataset) {
 70 | 		mapFileNameToDataset[relationName].forEach(function(datum) {
 71 | 			if (!_.isArray(datum.output))
 72 | 				datum.output = [datum.output];
 73 | 			datum.output = datum.output.map(function(anOutput) {
 74 | 				return _.isString(anOutput)? anOutput: JSON.stringify(anOutput);
 75 | 			});
 76 | 			featureLookupTable.addFeatures(datum.output);
 77 | 		});
 78 | 	}
 79 | 	
 80 | 
 81 | 	//console.dir(featureLookupTable);
 82 | 
 83 | 	var fs = require('fs');
 84 | 	for (var relationName in mapFileNameToDataset) {
 85 | 		fs.writeFileSync(outputFolder+"/"+relationName+".arff", 
 86 | 			toARFFLocal(mapFileNameToDataset[relationName], relationName, featureLookupTable));
 87 | 	}
 88 | }
 89 | 
 90 | 
 91 | /**
 92 |  * convert a single dataset to Weka ARFF string.
 93 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 94 |  * @param relationName string for the @relation on the top of the file.
 95 |  * @param featureLookupTable maps features to indices
 96 |  * @return an ARFF string. 
 97 |  */
 98 | var toARFFLocal = function(dataset, relationName, featureLookupTable) {
 99 | 	var arff = "% Automatically generated by Node.js\n";
100 | 	arff += "@relation "+relationName+"\n";
101 | 
102 | 	featureLookupTable.featureIndexToFeatureName.forEach(function(featureName) {
103 | 		if (_.isUndefined(featureName)) 
104 | 			arff += "@attribute undefined {0,1}"+"\n";
105 | 		else if (!_.isString(featureName))
106 | 			throw new Error("Expected featureName to be a string, but found "+JSON.stringify(featureName));
107 | 		else arff += "@attribute "+featureName.replace(/[^a-zA-Z0-9]/g, "_")+" "+"{0,1}"+"\n";
108 | 	});
109 | 
110 | 	arff += "\n@data\n";
111 | 	
112 | 	dataset.forEach(function(datum) {
113 | 		var datumArff = _.clone(datum.input, {});
114 | 		for (var i=0; i<datum.output.length; ++i)
115 | 			datumArff[datum.output[i]]=1;
116 | 		//console.dir(datumArff);
117 | 		var array = featureLookupTable.hashToArray(datumArff);
118 | 		arff += array + "\n";
119 | 	});
120 | 
121 | 	return arff;
122 | };
123 | 
124 | 


--------------------------------------------------------------------------------
/dist/formats/arff.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | /**
  4 |  * Static Utilities for writing files in ARFF format - the format used by WEKA.
  5 |  *
  6 |  * @note for READING files in ARFF format, see https://github.com/chesles/node-arff
  7 |  * 
  8 |  * @author Erel Segal-Halevi
  9 |  * @since 2013-08
 10 |  */
 11 | var _ = require('underscore')._;
 12 | 
 13 | var FeaturesUnit = require('../features');
 14 | /**
 15 |  * convert a single dataset to Weka ARFF string.
 16 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 17 |  * @param relationName string for the @relation on the top of the file.
 18 |  * @param featureExtractor [optional]
 19 |  * @return an ARFF string. 
 20 |  */
 21 | 
 22 | 
 23 | exports.toARFF = function (dataset, relationName, featureExtractor) {
 24 |   if (!featureExtractor) featureExtractor = _.identity;
 25 |   var featureLookupTable = new FeaturesUnit.FeatureLookupTable(); // Extract the input attributes (- features):
 26 | 
 27 |   dataset.forEach(function (datum) {
 28 |     datum.input = featureExtractor(datum.input, {});
 29 |     if (!_.isObject(datum.input)) throw new Error("Expected feature vector to be a hash, but found " + JSON.stringify(datum.input));
 30 |     featureLookupTable.addFeatures(datum.input);
 31 |   }); // Extract the target attributes (- classes):
 32 | 
 33 |   dataset.forEach(function (datum) {
 34 |     if (!_.isArray(datum.output)) datum.output = [datum.output];
 35 |     datum.output = datum.output.map(function (anOutput) {
 36 |       return _.isString(anOutput) ? anOutput : JSON.stringify(anOutput);
 37 |     });
 38 |     featureLookupTable.addFeatures(datum.output);
 39 |   }); //console.dir(featureLookupTable);
 40 | 
 41 |   return toARFFLocal(dataset, relationName, featureLookupTable);
 42 | };
 43 | /**
 44 |  * convert many dataset to Weka ARFF files.
 45 |  * @param mapFileNameToDataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 46 |  * @return an ARFF file. 
 47 |  */
 48 | 
 49 | 
 50 | exports.toARFFs = function (outputFolder, mapFileNameToDataset, featureExtractor) {
 51 |   if (!featureExtractor) featureExtractor = _.identity;
 52 |   var featureLookupTable = new FeaturesUnit.FeatureLookupTable(); // Extract the input attributes (- features):
 53 | 
 54 |   for (var relationName in mapFileNameToDataset) {
 55 |     mapFileNameToDataset[relationName].forEach(function (datum) {
 56 |       datum.input = featureExtractor(datum.input, {});
 57 |       if (!_.isObject(datum.input)) throw new Error("Expected feature vector to be a hash, but found " + JSON.stringify(datum.input));
 58 |       featureLookupTable.addFeatures(datum.input);
 59 |     });
 60 |   } // Extract the target attributes (- classes):
 61 | 
 62 | 
 63 |   for (var relationName in mapFileNameToDataset) {
 64 |     mapFileNameToDataset[relationName].forEach(function (datum) {
 65 |       if (!_.isArray(datum.output)) datum.output = [datum.output];
 66 |       datum.output = datum.output.map(function (anOutput) {
 67 |         return _.isString(anOutput) ? anOutput : JSON.stringify(anOutput);
 68 |       });
 69 |       featureLookupTable.addFeatures(datum.output);
 70 |     });
 71 |   } //console.dir(featureLookupTable);
 72 | 
 73 | 
 74 |   var fs = require('fs');
 75 | 
 76 |   for (var relationName in mapFileNameToDataset) {
 77 |     fs.writeFileSync(outputFolder + "/" + relationName + ".arff", toARFFLocal(mapFileNameToDataset[relationName], relationName, featureLookupTable));
 78 |   }
 79 | };
 80 | /**
 81 |  * convert a single dataset to Weka ARFF string.
 82 |  * @param dataset an array of samples in the format {input: {feature1: xxx, feature2: yyy, ...}, output: [1,2,3]}
 83 |  * @param relationName string for the @relation on the top of the file.
 84 |  * @param featureLookupTable maps features to indices
 85 |  * @return an ARFF string. 
 86 |  */
 87 | 
 88 | 
 89 | var toARFFLocal = function toARFFLocal(dataset, relationName, featureLookupTable) {
 90 |   var arff = "% Automatically generated by Node.js\n";
 91 |   arff += "@relation " + relationName + "\n";
 92 |   featureLookupTable.featureIndexToFeatureName.forEach(function (featureName) {
 93 |     if (_.isUndefined(featureName)) arff += "@attribute undefined {0,1}" + "\n";else if (!_.isString(featureName)) throw new Error("Expected featureName to be a string, but found " + JSON.stringify(featureName));else arff += "@attribute " + featureName.replace(/[^a-zA-Z0-9]/g, "_") + " " + "{0,1}" + "\n";
 94 |   });
 95 |   arff += "\n@data\n";
 96 |   dataset.forEach(function (datum) {
 97 |     var datumArff = _.clone(datum.input, {});
 98 | 
 99 |     for (var i = 0; i < datum.output.length; ++i) {
100 |       datumArff[datum.output[i]] = 1;
101 |     } //console.dir(datumArff);
102 | 
103 | 
104 |     var array = featureLookupTable.hashToArray(datumArff);
105 |     arff += array + "\n";
106 |   });
107 |   return arff;
108 | };


--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MetaLabelerSvmTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for Multi-Label classification in the Meta-Labeler method,
  3 |  * with Modified Balanced Winnow as the underlying binary classifier.
  4 |  */
  5 | 
  6 | import {
  7 |   EnhancedClassifier,
  8 |   multilabel,
  9 |   SvmLinear,
 10 |   SvmPerf
 11 | } from "../../../dist/core";
 12 | import { FeatureLookupTable } from "../../../dist/features";
 13 | import "../../sorted";
 14 | 
 15 | function test(name, SvmClassifier) {
 16 |   var retrain_count = 10;
 17 |   var BinaryRelevanceSvm = multilabel.BinaryRelevance.bind(this, {
 18 |     binaryClassifierType: SvmClassifier
 19 |   });
 20 | 
 21 |   var MetaLabelerSvm = EnhancedClassifier.bind(this, {
 22 |     classifierType: multilabel.MetaLabeler.bind(this, {
 23 |       rankerType: BinaryRelevanceSvm,
 24 |       counterType: BinaryRelevanceSvm
 25 |     }),
 26 |     featureLookupTable: new FeatureLookupTable()
 27 |   });
 28 | 
 29 |   var dataset = [
 30 |     { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class
 31 |     { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect)
 32 |     { input: { I: 1, want: 1, cc: 1 }, output: [{ C: "c" }] } // train on structured class, that will be stringified to "{C:c}".
 33 |   ];
 34 | 
 35 |   describe(
 36 |     "Meta-Labeler with " + name + ", batch-trained on Single-class inputs,",
 37 |     function() {
 38 |       var classifierBatch = new MetaLabelerSvm();
 39 |       classifierBatch.trainBatch(dataset);
 40 | 
 41 |       //console.log(util.inspect(classifierBatch, {depth:6}));
 42 | 
 43 |       var classifier = classifierBatch;
 44 |       it("classifies 1-class samples", function() {
 45 |         classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
 46 |         classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
 47 |         classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']);
 48 |       });
 49 | 
 50 |       it("knows its classes", function() {
 51 |         classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']);
 52 |       });
 53 | 
 54 |       it("explains its decisions", function() {
 55 |         var ab = classifier.classify(
 56 |           { I: 1, want: 1, aa: 1, and: 1, bb: 1 },
 57 |           /*explain=*/ 3
 58 |         );
 59 |         ab.should.have.property("explanation").with.property("ranking");
 60 |         ab.should.have.property("explanation").with.property("counting");
 61 |       });
 62 |     }
 63 |   );
 64 | 
 65 |   describe(
 66 |     "Meta-Labeler with " + name + ", batch-trained on two-class inputs,",
 67 |     function() {
 68 |       var classifier = new MetaLabelerSvm();
 69 |       classifier.trainBatch([
 70 |         { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] }, // train on array with classes
 71 |         { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] }, // train on array with classes
 72 |         { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] }, // train on set of classes
 73 |         { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] } // train on set of classes
 74 |       ]);
 75 |       //console.log(util.inspect(classifier, {depth:6}));
 76 | 
 77 |       it("classifies 2-class samples", function() {
 78 |         classifier
 79 |           .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 })
 80 |           .sorted()
 81 |           .should.eql(["A", "B"]);
 82 |         classifier
 83 |           .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 })
 84 |           .sorted()
 85 |           .should.eql(["B", "C"]);
 86 |         classifier
 87 |           .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 })
 88 |           .sorted()
 89 |           .should.eql(["C", "D"]);
 90 |         classifier
 91 |           .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 })
 92 |           .sorted()
 93 |           .should.eql(["A", "D"]);
 94 |       });
 95 | 
 96 |       it("explains its decisions", function() {
 97 |         // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4);
 98 |         classifier
 99 |           .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
100 |           .should.have.property("explanation")
101 |           .with.property("ranking");
102 |         // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
103 |         classifier
104 |           .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
105 |           .should.have.property("explanation")
106 |           .with.property("counting");
107 |       });
108 |     }
109 |   );
110 | }
111 | 
112 | if (SvmPerf.isInstalled())
113 |   test(
114 |     "SVM-Perf",
115 |     SvmPerf.bind(this, {
116 |       learn_args: "-c 20.0",
117 |       model_file_prefix: __dirname + "/../../tempfiles/SvmPerf"
118 |     })
119 |   );
120 | else
121 |   console.warn("svm_perf_learn not found - MetaLabelerSvmPerf tests skipped.");
122 | 
123 | if (SvmLinear.isInstalled())
124 |   test(
125 |     "SVM-LibLinear",
126 |     SvmLinear.bind(this, {
127 |       learn_args: "-c 20.0",
128 |       model_file_prefix: __dirname + "/../../tempfiles/SvmLinear"
129 |     })
130 |   );
131 | else
132 |   console.warn(
133 |     "liblinear_train not found - MetaLabelerSvmLinear tests skipped."
134 |   );
135 | 


--------------------------------------------------------------------------------
/src/utils/partitions.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Utilities for partitioning datasets of documents for training and testing.
  3 |  * 
  4 |  * @author Erel Segal-haLevi
  5 |  * @since 2013-06
  6 |  */
  7 | var _ = require("underscore")._;
  8 | 
  9 | 
 10 | /**
 11 |  * Create a single partition of the given dataset.
 12 |  *
 13 |  * @param dataset an array.
 14 |  * @param testSetStart an index into the array.
 15 |  * @param testSetCount int - the num of samples in the test set, starting from testSetStart.
 16 |  * @return an object {train: trainSet, test: testSet}s
 17 |  */
 18 | exports.partition = function(dataset, testSetStart, testSetCount) {
 19 | 		var datasetclone = JSON.parse(JSON.stringify(dataset));
 20 | 		var testSet = datasetclone.splice(testSetStart, testSetCount);
 21 | 		var trainSet = datasetclone; // without the test-set
 22 | 		return {train: trainSet, test: testSet};
 23 | }
 24 | 
 25 | /**
 26 |  * Create several different partitions of the given dataset to train and test.
 27 |  * Useful for cross-validation. 
 28 |  * 
 29 |  * @param dataset any array.
 30 |  * @param numOfPartitions number of different partitions to generate.
 31 |  * @param callback a function to call for each partition.
 32 |  * 
 33 |  * @return an object: {train: [array-for-train], test: [array-for-test]}
 34 |  * @note code adapted from Heather Arthur:  https://github.com/harthur/classifier/blob/master/test/cross-validation/cross-validate.js
 35 |  */
 36 | exports.partitions = function(dataset, numOfPartitions, callback) {
 37 | 	var shuffledDataset = _.shuffle(dataset);
 38 | 	var testSetCount = dataset.length / numOfPartitions;
 39 | 	
 40 | 	for (var iPartition=0; iPartition<numOfPartitions; ++iPartition) {
 41 | 		var testSetStart = iPartition*testSetCount;
 42 | 		var partition = exports.partition(dataset, testSetStart, testSetCount);
 43 | 		callback(partition.train, partition.test, iPartition);
 44 | 	}
 45 | }
 46 | 
 47 | /**
 48 |  * Create several different partitions of the given dataset to train and test without doing shuffling
 49 |  * Useful for cross-validation in Threshold classifier.
 50 |  * 
 51 | */
 52 | 
 53 | exports.partitions_consistent_by_fold = function(dataset, numOfPartitions, partitionIndex) {
 54 | 
 55 | 	if (!_.isArray(dataset))
 56 | 		throw new Error("dataset is not an array")
 57 | 
 58 | 	if (_.isUndefined(numOfPartitions))
 59 | 		throw new Error("numOfPartitions "+ numOfPartitions)
 60 | 
 61 | 	if (_.isUndefined(partitionIndex))
 62 | 		throw new Error("partitionIndex "+ partitionIndex)
 63 | 
 64 | 	var testSetCount = dataset.length / numOfPartitions;
 65 | 
 66 | 	var result = {'train': [], 'test': []}
 67 | 	
 68 | 	for (var iPartition=0; iPartition<numOfPartitions; ++iPartition) {
 69 | 		var testSetStart = iPartition*testSetCount;
 70 | 		var partition = exports.partition(dataset, testSetStart, testSetCount);
 71 | 
 72 | 		if (iPartition == partitionIndex)
 73 | 			{
 74 | 				result['train'] = partition.train
 75 | 				result['test'] = partition.test
 76 | 			}
 77 | 	}
 78 | 	return result
 79 | }
 80 | 
 81 | exports.partitions_consistent = function(dataset, numOfPartitions, callback) {
 82 | 	var testSetCount = dataset.length / numOfPartitions;
 83 | 	
 84 | 	for (var iPartition=0; iPartition<numOfPartitions; ++iPartition) {
 85 | 		var testSetStart = iPartition*testSetCount;
 86 | 		var partition = exports.partition(dataset, testSetStart, testSetCount);
 87 | 		callback(partition.train, partition.test, iPartition);
 88 | 	}
 89 | }
 90 | 
 91 | exports.partitions_reverese = function(dataset, numOfPartitions, callback) {
 92 | 	var testSetCount = dataset.length / numOfPartitions;
 93 | 	
 94 | 	for (var iPartition=0; iPartition<numOfPartitions; ++iPartition) {
 95 | 		var testSetStart = iPartition*testSetCount;
 96 | 		var partition = exports.partition(dataset, testSetStart, testSetCount);
 97 | 		callback(partition.test, partition.train, iPartition);
 98 | 	}
 99 | }
100 | 
101 | 
102 | exports.partitions_hash = function(datasetor, numOfPartitions, callback) {
103 | 
104 | 	var count = datasetor[Object.keys(datasetor)[0]].length
105 | 	var testSetCount = Math.floor(count / numOfPartitions)
106 | 
107 | 	for (var iPartition=0; iPartition<numOfPartitions; ++iPartition) {
108 | 		var testSetStart = iPartition*testSetCount;
109 | 
110 | 		var dataset = JSON.parse(JSON.stringify(datasetor))
111 | 
112 | 		var test = []
113 | 		var train = []
114 | 
115 | 		_(count - testSetCount).times(function(n){ train.push([]) })
116 | 		
117 | 		_.each(dataset, function(value, key, list){ 
118 | 			test = test.concat(value.splice(testSetStart, testSetCount))
119 | 			_.each(value, function(elem, key1, list1){ 
120 | 				train[key1].push(elem)
121 | 			}, this)
122 | 		}, this)
123 | 	
124 | 
125 | 		callback(train, test, iPartition);
126 | 	}
127 | }
128 | 
129 | 
130 | exports.partitions_hash_fold = function(datasetor, numOfPartitions, fold ) {
131 | 
132 | 	var count = datasetor[Object.keys(datasetor)[0]].length
133 | 	var testSetCount = Math.floor(count / numOfPartitions)
134 | 
135 | 	var testSetStart = fold*testSetCount;
136 | 	// var dataset = JSON.parse(JSON.stringify(datasetor))
137 | 
138 | 	var test = []
139 | 	var train = []
140 | 
141 | 	_(count - testSetCount).times(function(n){ train.push([]) })
142 | 		
143 | 	_.each(datasetor, function(value, key, list){ 
144 | 		test = test.concat(value.splice(testSetStart, testSetCount))
145 | 		_.each(value, function(elem, key1, list1){ 
146 | 			train[key1].push(elem)
147 | 		}, this)
148 | 	}, this)
149 | 	
150 | 	return {"train": train, "test": test}
151 | }
152 | 


--------------------------------------------------------------------------------
/src/core/multilabel/MetaLabeler.js:
--------------------------------------------------------------------------------
  1 | var hash = require("../../utils/hash");
  2 | var sprintf = require("sprintf").sprintf;
  3 | var _ = require("underscore")._;
  4 | 
  5 | /**
  6 |  * MetaLabeler - Multi-label classifier, based on:
  7 |  *
  8 |  * Tang Lei, Rajan Suju, Narayanan Vijay K.. Large scale multi-label classification via metalabeler in Proceedings of the 18th international conference on World wide webWWW '09(New York, NY, USA):211-220ACM 2009.
  9 |  * http://www.citeulike.org/user/erelsegal-halevi/article/4860265
 10 |  *
 11 |  * A MetaLabeler uses two multi-class classifiers to create a single multi-label classifier. One is called "ranker" and the other is called "counter".
 12 |  *
 13 |  * The MetaLabeler assigns labels to a sample in the following two stages:
 14 |  *  - Stage 1: Ranking. The sample is sent to the "ranker", which returns all available labels ordered from the most relevant to the least relevant.
 15 |  *  - Stage 2: Counting. The sample is sent to the "counter", which returns integer C >= 0 which represents a number of labels.
 16 |  * The MetaLabeler returns the C most relevant labels from the list returned by the ranker.
 17 |  *
 18 |  * @param opts
 19 |  *            rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels.
 20 |  *            counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels.
 21 |  */
 22 | class MetaLabeler {
 23 | 	constructor(opts) {
 24 | 		if (!opts.rankerType) {
 25 | 			console.dir(opts);
 26 | 			throw new Error("opts.rankerType not found");
 27 | 		}
 28 | 		if (!opts.counterType) {
 29 | 			console.dir(opts);
 30 | 			throw new Error("opts.counterType not found");
 31 | 		}
 32 | 		this.ranker = new opts.rankerType();
 33 | 		this.counter = new opts.counterType();
 34 | 	}
 35 | }
 36 | 
 37 | MetaLabeler.prototype = {
 38 | 
 39 | 	/**
 40 | 	 * Tell the classifier that the given sample belongs to the given classes.
 41 | 	 * 
 42 | 	 * @param sample  a document.
 43 | 	 * @param labels an array whose VALUES are classes.
 44 | 	 */
 45 | 	trainOnline: function(sample, labels) {
 46 | 		// The ranker is just trained by the given set of relevant labels:
 47 | 		this.ranker.trainOnline(sample, labels);
 48 | 
 49 | 		// The counter is trained by the *number* of relevant labels:
 50 | 		var labelCount = (Array.isArray(labels)? labels: Object.keys(labels)).length;
 51 | 		this.counter.trainOnline(sample, labelCount);
 52 | 	},
 53 | 
 54 | 	/**
 55 | 	 * Train the classifier with all the given documents.
 56 | 	 * 
 57 | 	 * @param dataset
 58 | 	 *            an array with objects of the format: 
 59 | 	 *            {input: sample1, output: [class11, class12...]}
 60 | 	 */
 61 | 	trainBatch : function(dataset) {
 62 | 		// The ranker is just trained by the given set of labels relevant to each sample:
 63 | 		this.ranker.trainBatch(dataset);
 64 | 
 65 | 		// The counter is trained by the *number* of labels relevant to each sample:
 66 | 		var labelCountDataset = dataset.map(function(datum) {
 67 | 			var labelCount = (Array.isArray(datum.output)? datum.output.length: 1);
 68 | 			return {
 69 | 				input: datum.input,
 70 | 				output: labelCount
 71 | 			};
 72 | 		});
 73 | 		this.counter.trainBatch(labelCountDataset);
 74 | 	},
 75 | 
 76 | 	/**
 77 | 	 * Use the model trained so far to classify a new sample.
 78 | 	 * 
 79 | 	 * @param sample a document.
 80 | 	 * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
 81 | 	 *  
 82 | 	 * @return an array whose VALUES are classes.
 83 | 	 */
 84 | 	classify: function(sample, explain) {
 85 | 		var rankedLabelsWithExplain = this.ranker.classify(sample, explain, /*withScores=*/true);
 86 | 		var rankedLabels = (explain>0? rankedLabelsWithExplain.classes: rankedLabelsWithExplain);
 87 | 		var labelCountWithExplain = this.counter.classify(sample, explain, /*withScores=*/true);
 88 | 		var labelCount = (explain>0? labelCountWithExplain.classes[0][0]: labelCountWithExplain[0][0]);
 89 | 		if (_.isString(labelCount)) labelCount = parseInt(labelCount);
 90 | 		
 91 | 		// Pick the labelCount most relevant labels from the list returned by the ranker:   
 92 | 		var positiveLabelsWithScores = rankedLabels.slice(0, labelCount);
 93 | 
 94 | 		var positiveLabels = positiveLabelsWithScores
 95 | 
 96 | 		if (positiveLabelsWithScores.length != 0)
 97 | 			if (_.isArray(positiveLabelsWithScores[0]))
 98 | 				var positiveLabels = positiveLabelsWithScores.map(function(labelWithScore) {return labelWithScore[0]});
 99 | 		
100 | 		return (explain>0? {
101 | 			classes: positiveLabels,
102 | 			explanation: {
103 | 				ranking: rankedLabelsWithExplain.explanation,
104 | 				counting: labelCountWithExplain.explanation
105 | 			}
106 | 		}:
107 | 		positiveLabels)
108 | 	},
109 | 	
110 | 	getAllClasses: function() {
111 | 		return this.ranker.getAllClasses();
112 | 	},
113 | 
114 | 	toJSON : function() {
115 | 	},
116 | 
117 | 	fromJSON : function(json) {
118 | 	},
119 | 	
120 | 	/**
121 | 	 * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations. 
122 | 	 */
123 | 	setFeatureLookupTable: function(featureLookupTable) {
124 | 		if (this.ranker.setFeatureLookupTable)
125 | 			this.ranker.setFeatureLookupTable(featureLookupTable);
126 | 		if (this.counter.setFeatureLookupTable)
127 | 			this.counter.setFeatureLookupTable(featureLookupTable);
128 | 	},
129 | }
130 | 
131 | 
132 | module.exports = MetaLabeler;
133 | 


--------------------------------------------------------------------------------
/dist/features/FeatureLookupTable.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
  4 | 
  5 | /**
  6 |  * FeatureLookupTable - a table for converting features to numbers and vice versa
  7 |  */
  8 | var FeatureLookupTable = function FeatureLookupTable() {
  9 |   _classCallCheck(this, FeatureLookupTable);
 10 | 
 11 |   this.featureIndexToFeatureName = [undefined];
 12 |   this.featureNameToFeatureIndex = {
 13 |     undefined: 0
 14 |   };
 15 | };
 16 | 
 17 | FeatureLookupTable.prototype = {
 18 |   // add a single feature, if it does not exist
 19 |   addFeature: function addFeature(feature) {
 20 |     if (!(feature in this.featureNameToFeatureIndex)) {
 21 |       var newIndex = this.featureIndexToFeatureName.length;
 22 |       this.featureIndexToFeatureName.push(feature);
 23 |       this.featureNameToFeatureIndex[feature] = newIndex;
 24 |     }
 25 |   },
 26 |   // add all features in the given hash or array
 27 |   addFeatures: function addFeatures(hash) {
 28 |     if (hash instanceof Array) {
 29 |       for (var index in hash) {
 30 |         this.addFeature(hash[index]);
 31 |       }
 32 |     } else if (hash instanceof Object) {
 33 |       for (var feature in hash) {
 34 |         this.addFeature(feature);
 35 |       }
 36 |     } else throw new Error("FeatureLookupTable.addFeatures expects a hash or an array, but got: " + JSON.stringify(hash));
 37 |   },
 38 |   // add all features in all hashes in the given array
 39 |   addFeaturess: function addFeaturess(hashes) {
 40 |     for (var i = 0; i < hashes.length; ++i) {
 41 |       this.addFeatures(hashes[i]);
 42 |     }
 43 |   },
 44 | 
 45 |   /**
 46 |    * Convert the given feature to a numeric index.
 47 |    */
 48 |   featureToNumber: function featureToNumber(feature) {
 49 |     this.addFeature(feature);
 50 |     return this.featureNameToFeatureIndex[feature];
 51 |   },
 52 |   numberToFeature: function numberToFeature(number) {
 53 |     return this.featureIndexToFeatureName[number];
 54 |   },
 55 | 
 56 |   /**
 57 |    * Convert the given hash of features to a numeric array, using 0 for padding.
 58 |    * If some features in the hash do not exist - they will be added.
 59 |    * @param hash any hash, for example, {a: 111, b: 222, c: 333}
 60 |    * @return a matching array, based on the current feature table. For example: [0, 111, 222, 0, 333]
 61 |    * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
 62 |    */
 63 |   hashToArray: function hashToArray(hash) {
 64 |     this.addFeatures(hash);
 65 |     var array = [];
 66 | 
 67 |     for (var featureIndex = 0; featureIndex < this.featureIndexToFeatureName.length; ++featureIndex) {
 68 |       array[featureIndex] = 0;
 69 |     }
 70 | 
 71 |     if (hash instanceof Array) {
 72 |       for (var i in hash) {
 73 |         array[this.featureNameToFeatureIndex[hash[i]]] = true;
 74 |       }
 75 |     } else if (hash instanceof Object) {
 76 |       for (var feature in hash) {
 77 |         array[this.featureNameToFeatureIndex[feature]] = hash[feature];
 78 |       }
 79 |     } else throw new Error("Unsupported type: " + JSON.stringify(hash));
 80 | 
 81 |     return array;
 82 |   },
 83 | 
 84 |   /**
 85 |    * Convert all the given hashes of features to numeric arrays, using 0 for padding.
 86 |    * If some features in some of the hashes do not exist - they will be added.
 87 |    * @param hashes an array of hashes, for example, [{a: 111, b: 222}, {a: 11, c: 33}, ...] 
 88 |    * @return an array of matching arrays, based on the current feature table. For example: [[111, 222], [11, 0, 33]]
 89 |    */
 90 |   hashesToArrays: function hashesToArrays(hashes) {
 91 |     this.addFeaturess(hashes);
 92 |     var arrays = [];
 93 | 
 94 |     for (var i = 0; i < hashes.length; ++i) {
 95 |       arrays[i] = [];
 96 | 
 97 |       for (var feature in this.featureNameToFeatureIndex) {
 98 |         arrays[i][this.featureNameToFeatureIndex[feature]] = hashes[i][feature] || 0;
 99 |       }
100 |     }
101 | 
102 |     return arrays;
103 |   },
104 | 
105 |   /**
106 |    * Convert the given numeric array to a hash of features, ignoring zero values.
107 |    * @note some code borrowed from Heather Arthur: https://github.com/harthur/brain/blob/master/lib/lookup.js
108 |    */
109 |   arrayToHash: function arrayToHash(array) {
110 |     var hash = {};
111 | 
112 |     for (var feature in this.featureNameToFeatureIndex) {
113 |       if (array[this.featureNameToFeatureIndex[feature]]) hash[feature] = array[this.featureNameToFeatureIndex[feature]];
114 |     }
115 | 
116 |     return hash;
117 |   },
118 | 
119 |   /**
120 |    * Convert the given numeric arrays to array of hashes of features, ignoring zero values.
121 |    */
122 |   arraysToHashes: function arraysToHashes(arrays) {
123 |     var hashes = [];
124 | 
125 |     for (var i = 0; i < arrays.length; ++i) {
126 |       hashes[i] = this.arrayToHash(arrays[i]);
127 |     }
128 | 
129 |     return hashes;
130 |   },
131 |   toJSON: function toJSON() {
132 |     return {
133 |       featureIndexToFeatureName: this.featureIndexToFeatureName,
134 |       featureNameToFeatureIndex: this.featureNameToFeatureIndex
135 |     };
136 |   },
137 |   fromJSON: function fromJSON(json) {
138 |     this.featureIndexToFeatureName = json.featureIndexToFeatureName;
139 |     this.featureNameToFeatureIndex = json.featureNameToFeatureIndex;
140 |   }
141 | };
142 | module.exports = FeatureLookupTable;


--------------------------------------------------------------------------------
/test/classifiersTest/multilabel/MetaLabelerWinnowTest.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * a unit-test for Multi-Label classification in the Meta-Labeler method,
  3 |  * with Modified Balanced Winnow as the underlying binary classifier.
  4 |  */
  5 | 
  6 | import { multilabel, Winnow } from "../../../dist/core";
  7 | import "../../sorted";
  8 | 
  9 | var retrain_count = 10;
 10 | var BinaryRelevanceWinnow = multilabel.BinaryRelevance.bind(this, {
 11 |   binaryClassifierType: Winnow.bind(this, {
 12 |     promotion: 1.5,
 13 |     demotion: 0.5,
 14 |     margin: 1,
 15 |     retrain_count: retrain_count
 16 |   })
 17 | });
 18 | 
 19 | var MetaLabelerWinnow = multilabel.MetaLabeler.bind(this, {
 20 |   rankerType: BinaryRelevanceWinnow,
 21 |   counterType: BinaryRelevanceWinnow
 22 | });
 23 | 
 24 | var dataset = [
 25 |   { input: { I: 1, want: 1, aa: 1 }, output: "A" }, // train on single class
 26 |   { input: { I: 1, want: 1, bb: 1 }, output: ["B"] }, // train on array with single class (same effect)
 27 |   { input: { I: 1, want: 1, cc: 1 }, output: [{ C: "c" }] } // train on structured class, that will be stringified to "{C:c}".
 28 | ];
 29 | 
 30 | describe("Meta-Labeler batch-trained on Single-class inputs", function() {
 31 |   var classifierBatch = new MetaLabelerWinnow();
 32 |   classifierBatch.trainBatch(dataset);
 33 | 
 34 |   var classifier = classifierBatch;
 35 |   it("classifies 1-class samples", function() {
 36 |     classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
 37 |     classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
 38 |     classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']);
 39 |   });
 40 | 
 41 |   it("knows its classes", function() {
 42 |     classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']);
 43 |   });
 44 | 
 45 |   it("explains its decisions", function() {
 46 |     var ab = classifier.classify(
 47 |       { I: 1, want: 1, aa: 1, and: 1, bb: 1 },
 48 |       /*explain=*/ 3
 49 |     );
 50 |     //console.dir(ab);
 51 |     ab.should.have.property("explanation").with.property("ranking");
 52 |     ab.should.have.property("explanation").with.property("counting");
 53 |   });
 54 | });
 55 | 
 56 | describe("Meta-Labeler online-trained on Single-class inputs", function() {
 57 |   var classifierOnline = new MetaLabelerWinnow();
 58 |   for (var i = 0; i <= retrain_count; ++i)
 59 |     for (var d = 0; d < dataset.length; ++d)
 60 |       classifierOnline.trainOnline(dataset[d].input, dataset[d].output);
 61 | 
 62 |   var classifier = classifierOnline;
 63 |   it("classifies 1-class samples", function() {
 64 |     classifier.classify({ I: 1, want: 1, aa: 1 }).should.eql(["A"]);
 65 |     classifier.classify({ I: 1, want: 1, bb: 1 }).should.eql(["B"]);
 66 |     classifier.classify({ I: 1, want: 1, cc: 1 }).should.eql(['{"C":"c"}']);
 67 |   });
 68 | 
 69 |   it("knows its classes", function() {
 70 |     classifier.getAllClasses().should.eql(["A", "B", '{"C":"c"}']);
 71 |   });
 72 | 
 73 |   it("explains its decisions", function() {
 74 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(3);
 75 |     classifier
 76 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
 77 |       .should.have.property("explanation")
 78 |       .with.property("ranking");
 79 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
 80 |     classifier
 81 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
 82 |       .should.have.property("explanation")
 83 |       .with.property("counting");
 84 |   });
 85 | });
 86 | 
 87 | describe("Meta-Labeler batch-trained on two-class inputs", function() {
 88 |   var classifier = new MetaLabelerWinnow();
 89 |   classifier.trainBatch([
 90 |     { input: { I: 1, want: 1, aa: 1, bb: 1 }, output: ["A", "B"] },
 91 |     { input: { I: 1, want: 1, bb: 1, cc: 1 }, output: ["B", "C"] },
 92 |     { input: { I: 1, want: 1, cc: 1, dd: 1 }, output: ["C", "D"] },
 93 |     { input: { I: 1, want: 1, dd: 1, aa: 1 }, output: ["D", "A"] }
 94 |   ]);
 95 | 
 96 |   it("classifies 2-class samples", function() {
 97 |     classifier
 98 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 })
 99 |       .sorted()
100 |       .should.eql(["A", "B"]);
101 |     classifier
102 |       .classify({ I: 1, want: 1, bb: 1, and: 1, cc: 1 })
103 |       .sorted()
104 |       .should.eql(["B", "C"]);
105 |     classifier
106 |       .classify({ I: 1, want: 1, cc: 1, and: 1, dd: 1 })
107 |       .sorted()
108 |       .should.eql(["C", "D"]);
109 |     classifier
110 |       .classify({ I: 1, want: 1, dd: 1, and: 1, aa: 1 })
111 |       .sorted()
112 |       .should.eql(["A", "D"]);
113 |   });
114 | 
115 |   it("explains its decisions", function() {
116 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/1).should.have.property('explanation').with.property('ranking').with.lengthOf(4);
117 |     classifier
118 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 1)
119 |       .should.have.property("explanation")
120 |       .with.property("ranking");
121 |     // classifier.classify({I:1 , want:1 , aa:1 , and:1 , bb:1 }, /*explain=*/3).should.have.property('explanation').with.property('counting').with.lengthOf(1);
122 |     classifier
123 |       .classify({ I: 1, want: 1, aa: 1, and: 1, bb: 1 }, /*explain=*/ 3)
124 |       .should.have.property("explanation")
125 |       .with.property("counting");
126 |   });
127 | });
128 | 


--------------------------------------------------------------------------------
/dist/utils/partitions.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | /**
  4 |  * Utilities for partitioning datasets of documents for training and testing.
  5 |  * 
  6 |  * @author Erel Segal-haLevi
  7 |  * @since 2013-06
  8 |  */
  9 | var _ = require("underscore")._;
 10 | /**
 11 |  * Create a single partition of the given dataset.
 12 |  *
 13 |  * @param dataset an array.
 14 |  * @param testSetStart an index into the array.
 15 |  * @param testSetCount int - the num of samples in the test set, starting from testSetStart.
 16 |  * @return an object {train: trainSet, test: testSet}s
 17 |  */
 18 | 
 19 | 
 20 | exports.partition = function (dataset, testSetStart, testSetCount) {
 21 |   var datasetclone = JSON.parse(JSON.stringify(dataset));
 22 |   var testSet = datasetclone.splice(testSetStart, testSetCount);
 23 |   var trainSet = datasetclone; // without the test-set
 24 | 
 25 |   return {
 26 |     train: trainSet,
 27 |     test: testSet
 28 |   };
 29 | };
 30 | /**
 31 |  * Create several different partitions of the given dataset to train and test.
 32 |  * Useful for cross-validation. 
 33 |  * 
 34 |  * @param dataset any array.
 35 |  * @param numOfPartitions number of different partitions to generate.
 36 |  * @param callback a function to call for each partition.
 37 |  * 
 38 |  * @return an object: {train: [array-for-train], test: [array-for-test]}
 39 |  * @note code adapted from Heather Arthur:  https://github.com/harthur/classifier/blob/master/test/cross-validation/cross-validate.js
 40 |  */
 41 | 
 42 | 
 43 | exports.partitions = function (dataset, numOfPartitions, callback) {
 44 |   var shuffledDataset = _.shuffle(dataset);
 45 | 
 46 |   var testSetCount = dataset.length / numOfPartitions;
 47 | 
 48 |   for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
 49 |     var testSetStart = iPartition * testSetCount;
 50 |     var partition = exports.partition(dataset, testSetStart, testSetCount);
 51 |     callback(partition.train, partition.test, iPartition);
 52 |   }
 53 | };
 54 | /**
 55 |  * Create several different partitions of the given dataset to train and test without doing shuffling
 56 |  * Useful for cross-validation in Threshold classifier.
 57 |  * 
 58 | */
 59 | 
 60 | 
 61 | exports.partitions_consistent_by_fold = function (dataset, numOfPartitions, partitionIndex) {
 62 |   if (!_.isArray(dataset)) throw new Error("dataset is not an array");
 63 |   if (_.isUndefined(numOfPartitions)) throw new Error("numOfPartitions " + numOfPartitions);
 64 |   if (_.isUndefined(partitionIndex)) throw new Error("partitionIndex " + partitionIndex);
 65 |   var testSetCount = dataset.length / numOfPartitions;
 66 |   var result = {
 67 |     'train': [],
 68 |     'test': []
 69 |   };
 70 | 
 71 |   for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
 72 |     var testSetStart = iPartition * testSetCount;
 73 |     var partition = exports.partition(dataset, testSetStart, testSetCount);
 74 | 
 75 |     if (iPartition == partitionIndex) {
 76 |       result['train'] = partition.train;
 77 |       result['test'] = partition.test;
 78 |     }
 79 |   }
 80 | 
 81 |   return result;
 82 | };
 83 | 
 84 | exports.partitions_consistent = function (dataset, numOfPartitions, callback) {
 85 |   var testSetCount = dataset.length / numOfPartitions;
 86 | 
 87 |   for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
 88 |     var testSetStart = iPartition * testSetCount;
 89 |     var partition = exports.partition(dataset, testSetStart, testSetCount);
 90 |     callback(partition.train, partition.test, iPartition);
 91 |   }
 92 | };
 93 | 
 94 | exports.partitions_reverese = function (dataset, numOfPartitions, callback) {
 95 |   var testSetCount = dataset.length / numOfPartitions;
 96 | 
 97 |   for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
 98 |     var testSetStart = iPartition * testSetCount;
 99 |     var partition = exports.partition(dataset, testSetStart, testSetCount);
100 |     callback(partition.test, partition.train, iPartition);
101 |   }
102 | };
103 | 
104 | exports.partitions_hash = function (datasetor, numOfPartitions, callback) {
105 |   var count = datasetor[Object.keys(datasetor)[0]].length;
106 |   var testSetCount = Math.floor(count / numOfPartitions);
107 | 
108 |   for (var iPartition = 0; iPartition < numOfPartitions; ++iPartition) {
109 |     var testSetStart = iPartition * testSetCount;
110 |     var dataset = JSON.parse(JSON.stringify(datasetor));
111 |     var test = [];
112 |     var train = [];
113 | 
114 |     _(count - testSetCount).times(function (n) {
115 |       train.push([]);
116 |     });
117 | 
118 |     _.each(dataset, function (value, key, list) {
119 |       test = test.concat(value.splice(testSetStart, testSetCount));
120 | 
121 |       _.each(value, function (elem, key1, list1) {
122 |         train[key1].push(elem);
123 |       }, this);
124 |     }, this);
125 | 
126 |     callback(train, test, iPartition);
127 |   }
128 | };
129 | 
130 | exports.partitions_hash_fold = function (datasetor, numOfPartitions, fold) {
131 |   var count = datasetor[Object.keys(datasetor)[0]].length;
132 |   var testSetCount = Math.floor(count / numOfPartitions);
133 |   var testSetStart = fold * testSetCount; // var dataset = JSON.parse(JSON.stringify(datasetor))
134 | 
135 |   var test = [];
136 |   var train = [];
137 | 
138 |   _(count - testSetCount).times(function (n) {
139 |     train.push([]);
140 |   });
141 | 
142 |   _.each(datasetor, function (value, key, list) {
143 |     test = test.concat(value.splice(testSetStart, testSetCount));
144 | 
145 |     _.each(value, function (elem, key1, list1) {
146 |       train[key1].push(elem);
147 |     }, this);
148 |   }, this);
149 | 
150 |   return {
151 |     "train": train,
152 |     "test": test
153 |   };
154 | };


--------------------------------------------------------------------------------
/dist/core/multilabel/MetaLabeler.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
  4 | 
  5 | var hash = require("../../utils/hash");
  6 | 
  7 | var sprintf = require("sprintf").sprintf;
  8 | 
  9 | var _ = require("underscore")._;
 10 | /**
 11 |  * MetaLabeler - Multi-label classifier, based on:
 12 |  *
 13 |  * Tang Lei, Rajan Suju, Narayanan Vijay K.. Large scale multi-label classification via metalabeler in Proceedings of the 18th international conference on World wide webWWW '09(New York, NY, USA):211-220ACM 2009.
 14 |  * http://www.citeulike.org/user/erelsegal-halevi/article/4860265
 15 |  *
 16 |  * A MetaLabeler uses two multi-class classifiers to create a single multi-label classifier. One is called "ranker" and the other is called "counter".
 17 |  *
 18 |  * The MetaLabeler assigns labels to a sample in the following two stages:
 19 |  *  - Stage 1: Ranking. The sample is sent to the "ranker", which returns all available labels ordered from the most relevant to the least relevant.
 20 |  *  - Stage 2: Counting. The sample is sent to the "counter", which returns integer C >= 0 which represents a number of labels.
 21 |  * The MetaLabeler returns the C most relevant labels from the list returned by the ranker.
 22 |  *
 23 |  * @param opts
 24 |  *            rankerType (mandatory) - the type of the multi-class classifier used for ranking the labels.
 25 |  *            counterType (mandatory) - the type of the multi-class classifier used for selecting the number of labels.
 26 |  */
 27 | 
 28 | 
 29 | var MetaLabeler = function MetaLabeler(opts) {
 30 |   _classCallCheck(this, MetaLabeler);
 31 | 
 32 |   if (!opts.rankerType) {
 33 |     console.dir(opts);
 34 |     throw new Error("opts.rankerType not found");
 35 |   }
 36 | 
 37 |   if (!opts.counterType) {
 38 |     console.dir(opts);
 39 |     throw new Error("opts.counterType not found");
 40 |   }
 41 | 
 42 |   this.ranker = new opts.rankerType();
 43 |   this.counter = new opts.counterType();
 44 | };
 45 | 
 46 | MetaLabeler.prototype = {
 47 |   /**
 48 |    * Tell the classifier that the given sample belongs to the given classes.
 49 |    * 
 50 |    * @param sample  a document.
 51 |    * @param labels an array whose VALUES are classes.
 52 |    */
 53 |   trainOnline: function trainOnline(sample, labels) {
 54 |     // The ranker is just trained by the given set of relevant labels:
 55 |     this.ranker.trainOnline(sample, labels); // The counter is trained by the *number* of relevant labels:
 56 | 
 57 |     var labelCount = (Array.isArray(labels) ? labels : Object.keys(labels)).length;
 58 |     this.counter.trainOnline(sample, labelCount);
 59 |   },
 60 | 
 61 |   /**
 62 |    * Train the classifier with all the given documents.
 63 |    * 
 64 |    * @param dataset
 65 |    *            an array with objects of the format: 
 66 |    *            {input: sample1, output: [class11, class12...]}
 67 |    */
 68 |   trainBatch: function trainBatch(dataset) {
 69 |     // The ranker is just trained by the given set of labels relevant to each sample:
 70 |     this.ranker.trainBatch(dataset); // The counter is trained by the *number* of labels relevant to each sample:
 71 | 
 72 |     var labelCountDataset = dataset.map(function (datum) {
 73 |       var labelCount = Array.isArray(datum.output) ? datum.output.length : 1;
 74 |       return {
 75 |         input: datum.input,
 76 |         output: labelCount
 77 |       };
 78 |     });
 79 |     this.counter.trainBatch(labelCountDataset);
 80 |   },
 81 | 
 82 |   /**
 83 |    * Use the model trained so far to classify a new sample.
 84 |    * 
 85 |    * @param sample a document.
 86 |    * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.
 87 |    *  
 88 |    * @return an array whose VALUES are classes.
 89 |    */
 90 |   classify: function classify(sample, explain) {
 91 |     var rankedLabelsWithExplain = this.ranker.classify(sample, explain,
 92 |     /*withScores=*/
 93 |     true);
 94 |     var rankedLabels = explain > 0 ? rankedLabelsWithExplain.classes : rankedLabelsWithExplain;
 95 |     var labelCountWithExplain = this.counter.classify(sample, explain,
 96 |     /*withScores=*/
 97 |     true);
 98 |     var labelCount = explain > 0 ? labelCountWithExplain.classes[0][0] : labelCountWithExplain[0][0];
 99 |     if (_.isString(labelCount)) labelCount = parseInt(labelCount); // Pick the labelCount most relevant labels from the list returned by the ranker:   
100 | 
101 |     var positiveLabelsWithScores = rankedLabels.slice(0, labelCount);
102 |     var positiveLabels = positiveLabelsWithScores;
103 |     if (positiveLabelsWithScores.length != 0) if (_.isArray(positiveLabelsWithScores[0])) var positiveLabels = positiveLabelsWithScores.map(function (labelWithScore) {
104 |       return labelWithScore[0];
105 |     });
106 |     return explain > 0 ? {
107 |       classes: positiveLabels,
108 |       explanation: {
109 |         ranking: rankedLabelsWithExplain.explanation,
110 |         counting: labelCountWithExplain.explanation
111 |       }
112 |     } : positiveLabels;
113 |   },
114 |   getAllClasses: function getAllClasses() {
115 |     return this.ranker.getAllClasses();
116 |   },
117 |   toJSON: function toJSON() {},
118 |   fromJSON: function fromJSON(json) {},
119 | 
120 |   /**
121 |    * Link to a FeatureLookupTable from a higher level in the hierarchy (typically from an EnhancedClassifier), used ONLY for generating meaningful explanations. 
122 |    */
123 |   setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) {
124 |     if (this.ranker.setFeatureLookupTable) this.ranker.setFeatureLookupTable(featureLookupTable);
125 |     if (this.counter.setFeatureLookupTable) this.counter.setFeatureLookupTable(featureLookupTable);
126 |   }
127 | };
128 | module.exports = MetaLabeler;


--------------------------------------------------------------------------------
/dist/core/decisiontree/DecisionTree.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | /* Implementation of Decision Tree classifier, ID3 implementation
  4 |    the code based on https://github.com/bugless/nodejs-decision-tree-id3/blob/master/lib/decision-tree.js
  5 |  */
  6 | var _ = require('underscore');
  7 | 
  8 | function DecisionTree(opts) {
  9 |   if (!opts) opts = {}; // this.debug = opts.debug || false; 
 10 | }
 11 | 
 12 | DecisionTree.prototype = {
 13 |   toJSON: function toJSON() {
 14 |     return this.root;
 15 |   },
 16 |   fromJSON: function fromJSON(json) {
 17 |     this.root = json;
 18 |   },
 19 |   createTree: function createTree(dataset, features) {
 20 |     var targets = _.unique(_.pluck(dataset, 'output'));
 21 | 
 22 |     if (targets.length == 1) {
 23 |       // console.log("end node! "+targets[0]);
 24 |       return {
 25 |         type: "result",
 26 |         val: targets[0],
 27 |         name: targets[0],
 28 |         alias: targets[0] + this.randomTag()
 29 |       };
 30 |     }
 31 | 
 32 |     if (features.length == 0) {
 33 |       // console.log("returning the most dominate feature!!!");
 34 |       var topTarget = this.mostCommon(targets);
 35 |       return {
 36 |         type: "result",
 37 |         val: topTarget,
 38 |         name: topTarget,
 39 |         alias: topTarget + this.randomTag()
 40 |       };
 41 |     }
 42 | 
 43 |     var bestFeature = this.maxGain(dataset, features);
 44 | 
 45 |     var remainingFeatures = _.without(features, bestFeature);
 46 | 
 47 |     var possibleValues = _.unique(_.pluck(_.pluck(dataset, 'input'), bestFeature));
 48 | 
 49 |     var node = {
 50 |       name: bestFeature,
 51 |       alias: bestFeature + this.randomTag()
 52 |     };
 53 |     node.type = "feature";
 54 |     node.vals = _.map(possibleValues, function (v) {
 55 |       var _newS = dataset.filter(function (x) {
 56 |         return x['input'][bestFeature] == v;
 57 |       });
 58 | 
 59 |       var child_node = {
 60 |         name: v,
 61 |         alias: v + this.randomTag(),
 62 |         type: "feature_value"
 63 |       };
 64 |       child_node.child = this.createTree(_newS, remainingFeatures);
 65 |       return child_node;
 66 |     }, this);
 67 |     return node;
 68 |   },
 69 |   mostCommon: function mostCommon(l) {
 70 |     return _.sortBy(l, function (a) {
 71 |       return this.count(a, l);
 72 |     }, this).reverse()[0];
 73 |   },
 74 |   count: function count(a, l) {
 75 |     return _.filter(l, function (b) {
 76 |       return b === a;
 77 |     }).length;
 78 |   },
 79 |   randomTag: function randomTag() {
 80 |     return "_r" + Math.round(Math.random() * 1000000).toString();
 81 |   },
 82 |   extractFeatures: function extractFeatures(dataset) {
 83 |     var features = [];
 84 | 
 85 |     for (var record in dataset) {
 86 |       for (var key in dataset[record]['input']) {
 87 |         features.push(key);
 88 |       }
 89 |     }
 90 | 
 91 |     return features;
 92 |   },
 93 |   gain: function gain(dataset, feature) {
 94 |     var attrVals = _.unique(_.pluck(_.pluck(dataset, 'input'), feature));
 95 | 
 96 |     var setEntropy = this.entropy(_.pluck(dataset, 'output'));
 97 | 
 98 |     var setSize = _.size(dataset);
 99 | 
100 |     var entropies = attrVals.map(function (n) {
101 |       var subset = dataset.filter(function (x) {
102 |         return x['input'][feature] === n;
103 |       });
104 |       return subset.length / setSize * this.entropy(_.pluck(subset, 'output'));
105 |     }, this);
106 |     var sumOfEntropies = entropies.reduce(function (a, b) {
107 |       return a + b;
108 |     }, 0);
109 |     return setEntropy - sumOfEntropies;
110 |   },
111 |   entropy: function entropy(vals) {
112 |     var uniqueVals = _.unique(vals);
113 | 
114 |     var probs = uniqueVals.map(function (x) {
115 |       return this.prob(x, vals);
116 |     }, this);
117 |     var logVals = probs.map(function (p) {
118 |       return -p * this.log2(p);
119 |     }, this);
120 |     return logVals.reduce(function (a, b) {
121 |       return a + b;
122 |     }, 0);
123 |   },
124 |   prob: function prob(val, vals) {
125 |     var instances = _.filter(vals, function (x) {
126 |       return x === val;
127 |     }).length;
128 | 
129 |     var total = vals.length;
130 |     return instances / total;
131 |   },
132 |   log2: function log2(n) {
133 |     return Math.log(n) / Math.log(2);
134 |   },
135 |   maxGain: function maxGain(dataset, features) {
136 |     return _.max(features, function (e) {
137 |       return this.gain(dataset, e);
138 |     }, this);
139 |   },
140 |   setFeatureLookupTable: function setFeatureLookupTable(featureLookupTable) {
141 |     this.featureLookupTable = featureLookupTable;
142 |   },
143 | 
144 |   /**
145 |    * Batch training (a set of samples). Uses the option this.retrain_count.
146 |    *
147 |    * @param dataset an array of samples of the form {input: {feature1: value1...} , output: 0/1} 
148 |    */
149 |   trainBatch: function trainBatch(dataset) {
150 |     var features = this.extractFeatures(dataset);
151 |     this.root = this.createTree(dataset, features);
152 |   },
153 | 
154 |   /**
155 |    * @param inputs a SINGLE sample (a hash of feature-value pairs).
156 |    * @param continuous_output if true, return the net classification value. If false [default], return 0 or 1.
157 |    * @param explain - int - if positive, an "explanation" field, with the given length, will be added to the result.  
158 |    * @return the classification of the sample.
159 |    */
160 |   classify: function classify(features) {
161 |     root = this.root;
162 | 
163 |     while (root.type !== "result") {
164 |       var attr = root.name;
165 |       var sampleVal = features[attr];
166 | 
167 |       var childNode = _.detect(root.vals, function (x) {
168 |         return x.name == sampleVal;
169 |       });
170 | 
171 |       if (childNode) {
172 |         root = childNode.child;
173 |       } else {
174 |         root = root.vals[0].child;
175 |       }
176 |     }
177 | 
178 |     return root.val;
179 |   }
180 | };
181 | module.exports = DecisionTree;


--------------------------------------------------------------------------------