├── src
    ├── index.js
    ├── encoder.js
    └── neural.js
├── package.json
├── benchmark
    ├── bench.js
    └── index.js
├── LICENSE
├── README.md
└── .gitignore


/src/index.js:
--------------------------------------------------------------------------------
1 | const { Neural } = require('./neural');
2 | const { normalize, tokenize, Encoder } = require('./encoder');
3 | 
4 | module.exports = {
5 |   normalize,
6 |   tokenize,
7 |   Encoder,
8 |   Neural,
9 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "fastest_nlu",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "./benchmark/index.js",
 6 |   "scripts": {
 7 |     "start": "node ."
 8 |   },
 9 |   "author": "",
10 |   "license": "MIT"
11 | }
12 | 


--------------------------------------------------------------------------------
/benchmark/bench.js:
--------------------------------------------------------------------------------
 1 | class Bench {
 2 |   constructor(settings = {}) {
 3 |     this.duration = settings.duration || 10000;
 4 |     this.transactionsPerRun = settings.transactionsPerRun || 1;
 5 |   }
 6 | 
 7 |   measure(fn, initfn) {
 8 |     const initValue = initfn();
 9 |     const hrstart = process.hrtime();
10 |     let runs = 0;
11 |     let elapsed = 0;
12 |     let resultIteration;
13 |     while (elapsed < this.duration) {
14 |       resultIteration = fn(initValue);
15 |       runs += 1;
16 |       const hrend = process.hrtime(hrstart);
17 |       elapsed = hrend[0] * 1000 + hrend[1] / 1000000;
18 |     }
19 |     const timePerTransaction = elapsed / (runs * this.transactionsPerRun);
20 |     return 1000 / timePerTransaction;
21 |   }
22 | }
23 | 
24 | module.exports = {
25 |   Bench,
26 | };
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jesús Seijas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fastest_nlu
 2 | 
 3 | ## Introduction
 4 | The idea is to have an NLU (Natural Language Understanding) of Conversational AI that is fast to train, fast to run, but with a good accuracy.
 5 | 
 6 | To do the comparision we use the english and spanish corpus from Amazon Massive dataset.
 7 | https://www.amazon.science/blog/amazon-releases-51-language-dataset-for-language-understanding
 8 | 
 9 | We will compare the speed and accuracy with RASA.
10 | 
11 | ## Installation
12 | 
13 | Download this repository.
14 | No need of ```npm install``` as there are no dependencies.
15 | 
16 | ## Run
17 | 
18 | ```sh
19 |   npm start
20 | ```
21 | 
22 | ## Results
23 | 
24 | For the English corpus, these are the results:
25 | - Time for training: 3s 213.6769ms
26 | - Accuracy: 83.66%
27 | - Transactions per second: 181836.99495205944
28 |  
29 | RASA Accuracy is 81.4%, time to train in RASA is 4517 seconds, Transactions per second in RASA are 84
30 | 
31 | For the Spanish corpus, these are the results:
32 | - Time for training: 2s 488.0854ms
33 | - Accuracy: 81.91%
34 | - Transactions per second: 141800.23397571384
35 |  
36 | RASA Accuracy is 80.4%, time to train in RASA is 4712 seconds, Transactions per second in RASA are 82
37 | 


--------------------------------------------------------------------------------
/benchmark/index.js:
--------------------------------------------------------------------------------
 1 | const { Neural } = require('../src');
 2 | const { Bench } = require('./bench');
 3 | const corpusEn = require('./corpus-massive-en.json');
 4 | const corpusEs = require('./corpus-massive-es.json');
 5 | 
 6 | function execFn({ net, data }) {
 7 |   let good = 0;
 8 |   data.forEach((item) => {
 9 |     const classifications = net.run(item.utterance);
10 |     if (classifications[0].intent === item.intent) {
11 |       good += 1;
12 |     }
13 |   });
14 |   return { good, total: data.length}
15 | }
16 | 
17 | function measureCorpus(corpus) {
18 |   const testData = [];
19 |   corpus.data.forEach((item) => {
20 |     item.tests.forEach((test) => {
21 |       testData.push({ utterance: test, intent: item.intent });
22 |     });
23 |   });
24 |   const net = new Neural();
25 |   const hrstart = process.hrtime();
26 |   net.train(corpus);
27 |   const hrend = process.hrtime(hrstart);
28 |   console.info('Time for training: %ds %dms', hrend[0], hrend[1] / 1000000);
29 |   const result = execFn({ net, data: testData });
30 |   console.log(`Accuracy: ${(result.good * 100) / result.total}`);
31 |   const bench = new Bench({ transactionsPerRun: testData.length });
32 |   const benchResult = bench.measure(execFn, () => ({ net, data: testData }));
33 |   console.log(`Transactions per second: ${benchResult}`);
34 | }
35 | 
36 | console.log('English corpus');
37 | measureCorpus(corpusEn);
38 | console.log('\nSpanish corpus');
39 | measureCorpus(corpusEs);
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | 
  9 | # Diagnostic reports (https://nodejs.org/api/report.html)
 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 11 | 
 12 | # Runtime data
 13 | pids
 14 | *.pid
 15 | *.seed
 16 | *.pid.lock
 17 | 
 18 | # Directory for instrumented libs generated by jscoverage/JSCover
 19 | lib-cov
 20 | 
 21 | # Coverage directory used by tools like istanbul
 22 | coverage
 23 | *.lcov
 24 | 
 25 | # nyc test coverage
 26 | .nyc_output
 27 | 
 28 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 29 | .grunt
 30 | 
 31 | # Bower dependency directory (https://bower.io/)
 32 | bower_components
 33 | 
 34 | # node-waf configuration
 35 | .lock-wscript
 36 | 
 37 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 38 | build/Release
 39 | 
 40 | # Dependency directories
 41 | node_modules/
 42 | jspm_packages/
 43 | 
 44 | # TypeScript v1 declaration files
 45 | typings/
 46 | 
 47 | # TypeScript cache
 48 | *.tsbuildinfo
 49 | 
 50 | # Optional npm cache directory
 51 | .npm
 52 | 
 53 | # Optional eslint cache
 54 | .eslintcache
 55 | 
 56 | # Microbundle cache
 57 | .rpt2_cache/
 58 | .rts2_cache_cjs/
 59 | .rts2_cache_es/
 60 | .rts2_cache_umd/
 61 | 
 62 | # Optional REPL history
 63 | .node_repl_history
 64 | 
 65 | # Output of 'npm pack'
 66 | *.tgz
 67 | 
 68 | # Yarn Integrity file
 69 | .yarn-integrity
 70 | 
 71 | # dotenv environment variables file
 72 | .env
 73 | .env.test
 74 | 
 75 | # parcel-bundler cache (https://parceljs.org/)
 76 | .cache
 77 | 
 78 | # Next.js build output
 79 | .next
 80 | 
 81 | # Nuxt.js build / generate output
 82 | .nuxt
 83 | dist
 84 | 
 85 | # Gatsby files
 86 | .cache/
 87 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
 88 | # https://nextjs.org/blog/next-9-1#public-directory-support
 89 | # public
 90 | 
 91 | # vuepress build output
 92 | .vuepress/dist
 93 | 
 94 | # Serverless directories
 95 | .serverless/
 96 | 
 97 | # FuseBox cache
 98 | .fusebox/
 99 | 
100 | # DynamoDB Local files
101 | .dynamodb/
102 | 
103 | # TernJS port file
104 | .tern-port
105 | 


--------------------------------------------------------------------------------
/src/encoder.js:
--------------------------------------------------------------------------------
 1 | const normalize = (str) =>
 2 |   str
 3 |     .normalize('NFD')
 4 |     .replace(/[\u0300-\u036f]/g, '')
 5 |     .toLowerCase();
 6 | 
 7 | const tokenize = (str) => str.split(/[\s,.!?;:([\]'"¡¿)/]+/).filter((x) => x);
 8 | 
 9 | class Encoder {
10 |   constructor(processor) {
11 |     this.processor = processor || ((str) => tokenize(normalize(str)));
12 |     this.featureMap = new Map();
13 |     this.numFeature = 0;
14 |     this.intentMap = new Map();
15 |     this.intents = [];
16 |   }
17 | 
18 |   learnIntent(intent) {
19 |     if (!this.intentMap.has(intent)) {
20 |       this.intentMap.set(intent, this.intents.length);
21 |       this.intents.push(intent);
22 |     }
23 |   }
24 | 
25 |   learnFeature(feature) {
26 |     if (!this.featureMap.has(feature)) {
27 |       this.featureMap.set(feature, this.numFeature);
28 |       this.numFeature += 1;
29 |     }
30 |   }
31 | 
32 |   encodeText(text, learn = false) {
33 |     const dict = {};
34 |     const keys = [];
35 |     const features = this.processor(text);
36 |     features.forEach((feature) => {
37 |       if (learn) {
38 |         this.learnFeature(feature);
39 |       }
40 |       const index = this.featureMap.get(feature);
41 |       if (index !== undefined && dict[index] === undefined) {
42 |         dict[index] = 1;
43 |         keys.push(index);
44 |       }
45 |     });
46 |     return keys;
47 |   }
48 | 
49 |   encode(text, intent, learn = false) {
50 |     if (learn) {
51 |       this.learnIntent(intent);
52 |     }
53 |     return {
54 |       input: this.encodeText(text, learn),
55 |       output: this.intentMap.get(intent),
56 |     };
57 |   }
58 | 
59 |   encodeCorpus(corpus) {
60 |     const result = { train: [], validation: [] };
61 |     corpus.forEach(({ utterances, intent }) => {
62 |       if (utterances) {
63 |         utterances.forEach((utterance) => {
64 |           result.train.push(this.encode(utterance, intent, true));
65 |         });
66 |       }
67 |     });
68 |     corpus.forEach(({ tests, intent }) => {
69 |       if (tests) {
70 |         tests.forEach((test) => {
71 |           result.validation.push(this.encode(test, intent));
72 |         });
73 |       }
74 |     });
75 |     return result;
76 |   }
77 | }
78 | 
79 | module.exports = { normalize, tokenize, Encoder };
80 | 


--------------------------------------------------------------------------------
/src/neural.js:
--------------------------------------------------------------------------------
 1 | const { Encoder } = require('./encoder');
 2 | 
 3 | const defaultLogFn = (status, time) =>
 4 |   console.log(`Epoch ${status.iterations} loss ${status.error} time ${time}ms`);
 5 | 
 6 | const runInputPerceptron = (weights, input) => {
 7 |   const sum = input.reduce((acc, key) => acc + weights[key], 0);
 8 |   return sum <= 0 ? 0 : sum;
 9 | };
10 | 
11 | class Neural {
12 |   constructor(settings = {}) {
13 |     this.settings = settings;
14 |     this.settings.maxIterations ??= 150;
15 |     this.settings.learningRate ??= 0.002;
16 |     this.logFn = this.settings.log === true ? defaultLogFn : this.settings.log;
17 |   }
18 | 
19 |   prepareCorpus(corpus) {
20 |     this.encoder = this.settings.encoder || new Encoder(this.settings.processor);
21 |     this.encoded = this.encoder.encodeCorpus(corpus);
22 |   }
23 | 
24 |   initialize(corpus) {
25 |     this.prepareCorpus(corpus);
26 |     this.status = { error: Infinity, iterations: 0 };
27 |     this.perceptrons = this.encoder.intents.map((intent) => ({
28 |       intent,
29 |       id: this.encoder.intentMap.get(intent),
30 |       weights: new Float32Array(this.encoder.numFeature),
31 |     }));
32 |   }
33 | 
34 |   trainPerceptron(perceptron, data) {
35 |     const { learningRate } = this.settings;
36 |     const { weights } = perceptron;
37 |     let error = 0;
38 |     data.forEach(({ input, output }) => {
39 |       const actualOutput = runInputPerceptron(weights, input, true);
40 |       const expectedOutput = output === perceptron.id ? 1 : 0;
41 |       const currentError = expectedOutput - actualOutput;
42 |       if (currentError) {
43 |         error += currentError ** 2;
44 |         const change = currentError * learningRate;
45 |         input.forEach((key) => {
46 |           weights[key] += change;
47 |         });
48 |       }
49 |     });
50 |     return error;
51 |   }
52 | 
53 |   train(corpus) {
54 |     this.initialize(Array.isArray(corpus) ? corpus : corpus.data);
55 |     const data = this.encoded.train;
56 |     const { maxIterations } = this.settings;
57 |     while (this.status.iterations < maxIterations) {
58 |       const hrstart = new Date();
59 |       this.status.iterations += 1;
60 |       this.status.error =
61 |         this.perceptrons.reduce(
62 |           (acc, perceptron) => acc + this.trainPerceptron(perceptron, data),
63 |           0
64 |         ) /
65 |         (data.length * this.perceptrons.length);
66 |       if (this.logFn) {
67 |         const hrend = new Date();
68 |         this.logFn(this.status, hrend.getTime() - hrstart.getTime());
69 |       }
70 |     }
71 |     return this.status;
72 |   }
73 | 
74 |   run(text) {
75 |     const input = this.encoder.encodeText(text);
76 |     const result = [];
77 |     this.perceptrons.forEach(({ weights, intent }) => {
78 |       const score = runInputPerceptron(weights, input);
79 |       if (score) {
80 |         result.push({ intent, score });
81 |       }
82 |     });
83 |     if (!result.length) {
84 |       return [{ intent: 'None', score: 0 }];
85 |     }
86 |     return result.sort((a, b) => b.score - a.score);
87 |   }
88 | }
89 | 
90 | module.exports = { Neural };
91 | 


--------------------------------------------------------------------------------