├── .gitattributes
├── logo.png
├── banner.png
├── .eslintignore
├── .gitignore
├── playground
    ├── src
    │   ├── style.css
    │   ├── main.js
    │   └── App.vue
    ├── .postcssrc.cjs
    ├── vite.config.js
    ├── tailwind.config.cjs
    ├── index.html
    └── package.json
├── .prettierrc
├── .prettierignore
├── .editorconfig
├── tests-integration
    ├── node
    │   ├── package.json
    │   └── index.js
    ├── deno
    │   └── index.ts
    ├── node-esm
    │   ├── index.js
    │   └── package.json
    └── run.sh
├── .npmignore
├── index.html
├── src
    ├── benchmark
    │   ├── tinyld.ts
    │   ├── tinyld_heavy.ts
    │   ├── tinyld_light.ts
    │   ├── languagedetect.ts
    │   ├── langdetect.ts
    │   ├── franc.ts
    │   ├── franc-all.ts
    │   ├── franc-min.ts
    │   ├── cld.ts
    │   └── bench.ts
    ├── clean
    │   └── index.ts
    ├── index.ts
    ├── index_heavy.ts
    ├── index_light.ts
    ├── train
    │   └── splitter.ts
    ├── tokenizer.ts
    ├── train.ts
    └── core.ts
├── .github
    └── workflows
    │   ├── main.yml
    │   └── playground.yml
├── docs
    ├── light.md
    ├── install.md
    ├── api.md
    ├── dev.md
    ├── cli.md
    ├── langs.md
    ├── algorithm.md
    ├── faq.md
    ├── benchmark.md
    ├── overall.svg
    └── language.svg
├── tsconfig.json
├── tests
    ├── light.js
    ├── locale.js
    ├── clean.js
    └── detect.js
├── bin
    ├── tinyld.js
    ├── tinyld-heavy.js
    └── tinyld-light.js
├── .eslintrc
├── license
├── utils
    ├── overall.js
    ├── index.js
    ├── exectime.js
    ├── language.js
    ├── length.js
    └── mkdown.js
├── data
    └── bench
    │   ├── langdetect.json
    │   ├── tinyld-light.json
    │   ├── tinyld.json
    │   ├── franc-all.json
    │   ├── tinyld-heavy.json
    │   ├── franc.json
    │   ├── cld.json
    │   ├── languagedetect.json
    │   └── franc-min.json
├── Readme.md
└── package.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | package-lock.json binary
2 | yarn.lock binary
3 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/komodojp/tinyld/HEAD/logo.png


--------------------------------------------------------------------------------
/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/komodojp/tinyld/HEAD/banner.png


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | bin
3 | dist
4 | tests
5 | tests-integration
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | data/tmp
3 | data/udhr
4 | data/tatoeba.csv
5 | node_modules
6 | 


--------------------------------------------------------------------------------
/playground/src/style.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 


--------------------------------------------------------------------------------
/playground/.postcssrc.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {}
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 120,
3 |   "trailingComma": "none",
4 |   "singleQuote": true,
5 |   "semi": false
6 | }
7 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
 1 | .quasar
 2 | node_modules
 3 | dist
 4 | public
 5 | coverage
 6 | build
 7 | SteamCI
 8 | config
 9 | *.log
10 | package.json
11 | package-lock.json
12 | tests-integration
13 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_style = space
 6 | indent_size = 2
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | 


--------------------------------------------------------------------------------
/tests-integration/node/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "language-detect-node",
3 |   "version": "1.0.0",
4 |   "description": "",
5 |   "main": "index.js",
6 |   "author": "",
7 |   "license": "ISC"
8 | }
9 | 


--------------------------------------------------------------------------------
/playground/vite.config.js:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 | import vue from '@vitejs/plugin-vue'
3 | 
4 | // https://vitejs.dev/config/
5 | export default defineConfig({
6 |   plugins: [vue()]
7 | })
8 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .github
 3 | data
 4 | docs
 5 | src
 6 | tests
 7 | utils
 8 | 
 9 | .editorconfig
10 | .eslintignore
11 | .eslintrc
12 | .prettierignore
13 | .prettierrc
14 | 
15 | tsconfig.json
16 | .gitattributes
17 | 


--------------------------------------------------------------------------------
/tests-integration/deno/index.ts:
--------------------------------------------------------------------------------
1 | import { detect } from '../../dist/tinyld.normal.node.mjs'
2 | 
3 | const language = detect('これは日本語です.')
4 | console.log(`Detect Language ${language}`)
5 | Deno.exit(language === 'ja' ? 0 : 1)
6 | 


--------------------------------------------------------------------------------
/tests-integration/node-esm/index.js:
--------------------------------------------------------------------------------
1 | import { detect } from '../../dist/tinyld.normal.node.mjs'
2 | 
3 | const language = detect('これは日本語です.')
4 | console.log(`Detect Language ${language}`)
5 | process.exit(language === 'ja' ? 0 : 1)
6 | 


--------------------------------------------------------------------------------
/tests-integration/node/index.js:
--------------------------------------------------------------------------------
1 | const { detect } = require('../../dist/tinyld.normal.node.js')
2 | 
3 | const language = detect('これは日本語です.')
4 | console.log(`Detect Language ${language}`)
5 | process.exit(language === 'ja' ? 0 : 1)
6 | 


--------------------------------------------------------------------------------
/tests-integration/node-esm/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "language-detect-node-esm",
 3 |   "type": "module",
 4 |   "version": "1.0.0",
 5 |   "description": "",
 6 |   "main": "index.js",
 7 |   "author": "",
 8 |   "license": "ISC"
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests-integration/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd "$(dirname "$0")"
 4 | 
 5 | echo "> Check DENO"
 6 | deno run ./deno/index.ts
 7 | 
 8 | echo "> Check NODE"
 9 | node ./node/index.js
10 | 
11 | echo "> Check NODE ESM"
12 | node ./node-esm/index.js
13 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="UTF-8">
 5 |   </head>
 6 |   <body>
 7 |     <script type="module">
 8 |       import { detect } from './dist/tinyld.esm.js'
 9 | 
10 |       console.log(detect('test my text'))
11 |     </script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/playground/tailwind.config.cjs:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | module.exports = {
 3 |   content: [
 4 |     './index.html',
 5 |     './public/**/*.html',
 6 |     './src/**/*.{vue,js,ts,jsx,tsx}'
 7 |   ],
 8 |   theme: {
 9 |     extend: {},
10 |   },
11 |   plugins: [],
12 | }
13 | 


--------------------------------------------------------------------------------
/src/benchmark/tinyld.ts:
--------------------------------------------------------------------------------
1 | import { detect } from '../index'
2 | import { benchmark } from './bench'
3 | import fs from 'fs'
4 | ;(async () => {
5 |   const res = await benchmark(detect)
6 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
7 |   fs.writeFileSync('./data/bench/tinyld.json', JSON.stringify(res, null, 2))
8 | })()
9 | 


--------------------------------------------------------------------------------
/src/benchmark/tinyld_heavy.ts:
--------------------------------------------------------------------------------
1 | import { detect } from '../index_heavy'
2 | import { benchmark } from './bench'
3 | import fs from 'fs'
4 | ;(async () => {
5 |   const res = await benchmark(detect)
6 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
7 |   fs.writeFileSync('./data/bench/tinyld-heavy.json', JSON.stringify(res, null, 2))
8 | })()
9 | 


--------------------------------------------------------------------------------
/src/benchmark/tinyld_light.ts:
--------------------------------------------------------------------------------
1 | import { detect } from '../index_light'
2 | import { benchmark } from './bench'
3 | import fs from 'fs'
4 | ;(async () => {
5 |   const res = await benchmark(detect)
6 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
7 |   fs.writeFileSync('./data/bench/tinyld-light.json', JSON.stringify(res, null, 2))
8 | })()
9 | 


--------------------------------------------------------------------------------
/playground/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Tinyld Playground</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="app"></div>
11 |     <script type="module" src="/src/main.js"></script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ develop ]
 6 |   pull_request:
 7 |     branches: [ develop ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Use Node.js
15 |         uses: actions/setup-node@v2
16 |         with:
17 |           node-version: '18.x'
18 |       - run: |
19 |           yarn
20 |           yarn build
21 |           yarn test
22 | 


--------------------------------------------------------------------------------
/docs/light.md:
--------------------------------------------------------------------------------
 1 | # **TinyLD** (Light Flavor, for web usage)
 2 | 
 3 | The normal library can be a bit massive (mostly caused by the language profile database), which can be problematic for web usage.
 4 | 
 5 | For this usage we also provide a lighter version (a tradeoff between disk size and accuracy)
 6 | 
 7 | - import with: `import { detect } from 'tinyld/dist/tinyld.light.cjs'`
 8 | - normal version ~900KB, light version is only ~100KB (~25KB with gzip)
 9 | - only 24 languages supported
10 | - slightly less accurate, only ~95%
11 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ESNext",
 4 |     "module": "commonjs",
 5 |     "moduleResolution": "node",
 6 |     "esModuleInterop": true,
 7 |     "strict": true,
 8 |     "noImplicitAny": true,
 9 |     "noImplicitThis": true,
10 |     "alwaysStrict": true,
11 |     "strictBindCallApply": true,
12 |     "strictNullChecks": true,
13 |     "strictFunctionTypes": true,
14 |     "strictPropertyInitialization": true,
15 |     "sourceMap": false,
16 |     "resolveJsonModule": true,
17 |     "rootDir": "src",
18 |     "outDir": "dist"
19 |   },
20 |   "include": ["src/**/*"]
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/light.js:
--------------------------------------------------------------------------------
 1 | const { test } = require('uvu')
 2 | const assert = require('uvu/assert')
 3 | const light = require('../dist/tinyld.light.node.js')
 4 | 
 5 | function assertLocale(locale, val) {
 6 |   const res = light.detectAll(val)
 7 |   if (res.length > 0 && res[0].lang != locale) light.detectAll(val, { verbose: true })
 8 |   assert.is(light.detect(val), locale, `is ${locale} : ${val}`)
 9 | }
10 | 
11 | test('Supported Language', () => {
12 |   assert.is(light.supportedLanguages.length, 24)
13 | })
14 | 
15 | test('Detect English', () => {
16 |   assertLocale('en', 'I’m still learning English, so please speak slowly.')
17 | })
18 | 
19 | test.run()
20 | 


--------------------------------------------------------------------------------
/src/benchmark/languagedetect.ts:
--------------------------------------------------------------------------------
 1 | import { benchmark } from './bench'
 2 | import fs from 'fs'
 3 | 
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const languageDetect = require('languagedetect')
 6 | const lngDetector = new languageDetect()
 7 | lngDetector.setLanguageType('iso2')
 8 | 
 9 | function detect(val: string): string {
10 |   const res = lngDetector.detect(val)
11 |   if (res.length > 0) return res[0][0] || ''
12 |   return ''
13 | }
14 | 
15 | ;(async () => {
16 |   const res = await benchmark(detect)
17 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
18 |   fs.writeFileSync('./data/bench/languagedetect.json', JSON.stringify(res, null, 2))
19 | })()
20 | 


--------------------------------------------------------------------------------
/playground/src/main.js:
--------------------------------------------------------------------------------
 1 | import { createApp } from 'vue'
 2 | import './style.css'
 3 | import App from './App.vue'
 4 | 
 5 | import { library } from '@fortawesome/fontawesome-svg-core'
 6 | import { faGithub } from '@fortawesome/free-brands-svg-icons'
 7 | import { faChartLine, faShareFromSquare } from '@fortawesome/free-solid-svg-icons'
 8 | import { faCircleQuestion } from '@fortawesome/free-regular-svg-icons'
 9 | import { FontAwesomeIcon } from '@fortawesome/vue-fontawesome'
10 | 
11 | library.add(faGithub)
12 | library.add(faChartLine)
13 | library.add(faShareFromSquare)
14 | library.add(faCircleQuestion)
15 | 
16 | const app = createApp(App)
17 | app.component('v-icon', FontAwesomeIcon)
18 | app.mount('#app')
19 | 


--------------------------------------------------------------------------------
/src/benchmark/langdetect.ts:
--------------------------------------------------------------------------------
 1 | import { benchmark } from './bench'
 2 | import fs from 'fs'
 3 | 
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const { detect } = require('langdetect')
 6 | 
 7 | function langdetect(val: string): string {
 8 |   const res = detect(val)
 9 |   if (res && res.length > 0) {
10 |     const lang = res[0].lang || ''
11 |     if (['zh-cn', 'zh-tw'].includes(lang)) return 'zh'
12 |     return lang
13 |   }
14 |   return ''
15 | }
16 | 
17 | ;(async () => {
18 |   const res = await benchmark(langdetect)
19 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
20 |   fs.writeFileSync('./data/bench/langdetect.json', JSON.stringify(res, null, 2))
21 | })()
22 | 


--------------------------------------------------------------------------------
/bin/tinyld.js:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env node
 2 | const { detectAll } = require('../dist/tinyld.normal.node.js')
 3 | 
 4 | function main() {
 5 |   const [, , ...args] = process.argv
 6 | 
 7 |   let onlyLangs = []
 8 |   let verbose = false
 9 | 
10 |   const texts = []
11 |   for (const arg of [...args]) {
12 |     if (arg.startsWith('--only=')) {
13 |       onlyLangs = arg.replace('--only=', '').split(',')
14 |       continue
15 |     }
16 | 
17 |     if (arg.startsWith('--verbose') || arg.startsWith('-v')) {
18 |       verbose = true
19 |       continue
20 |     }
21 | 
22 |     texts.push(arg)
23 |   }
24 |   const message = texts.join(' ')
25 |   const options = { only: onlyLangs, verbose }
26 |   console.log(detectAll(message, options))
27 | }
28 | 
29 | main()
30 | 


--------------------------------------------------------------------------------
/bin/tinyld-heavy.js:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env node
 2 | const { detectAll } = require('../dist/tinyld.heavy.node.js')
 3 | 
 4 | function main() {
 5 |   const [, , ...args] = process.argv
 6 | 
 7 |   let onlyLangs = []
 8 |   let verbose = false
 9 | 
10 |   const texts = []
11 |   for (const arg of [...args]) {
12 |     if (arg.startsWith('--only=')) {
13 |       onlyLangs = arg.replace('--only=', '').split(',')
14 |       continue
15 |     }
16 | 
17 |     if (arg.startsWith('--verbose') || arg.startsWith('-v')) {
18 |       verbose = true
19 |       continue
20 |     }
21 | 
22 |     texts.push(arg)
23 |   }
24 |   const message = texts.join(' ')
25 |   const options = { only: onlyLangs, verbose }
26 |   console.log(detectAll(message, options))
27 | }
28 | 
29 | main()
30 | 


--------------------------------------------------------------------------------
/bin/tinyld-light.js:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env node
 2 | const { detectAll } = require('../dist/tinyld.light.node.js')
 3 | 
 4 | function main() {
 5 |   const [, , ...args] = process.argv
 6 | 
 7 |   let onlyLangs = []
 8 |   let verbose = false
 9 | 
10 |   const texts = []
11 |   for (const arg of [...args]) {
12 |     if (arg.startsWith('--only=')) {
13 |       onlyLangs = arg.replace('--only=', '').split(',')
14 |       continue
15 |     }
16 | 
17 |     if (arg.startsWith('--verbose') || arg.startsWith('-v')) {
18 |       verbose = true
19 |       continue
20 |     }
21 | 
22 |     texts.push(arg)
23 |   }
24 |   const message = texts.join(' ')
25 |   const options = { only: onlyLangs, verbose }
26 |   console.log(detectAll(message, options))
27 | }
28 | 
29 | main()
30 | 


--------------------------------------------------------------------------------
/src/benchmark/franc.ts:
--------------------------------------------------------------------------------
 1 | import { toISO2 } from '../core'
 2 | import { benchmark } from './bench'
 3 | import fs from 'fs'
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const franc = require('franc')
 6 | 
 7 | const langMap: { [id: string]: string } = {
 8 |   arb: 'ara',
 9 |   fas: 'pes',
10 |   lav: 'lat',
11 |   nno: 'nob'
12 | }
13 | 
14 | function detect(val: string): string {
15 |   let res = franc(val)
16 |   if (res === 'und') res = ''
17 |   else if (res in langMap) res = langMap[res]
18 |   return res ? toISO2(res) : ''
19 | }
20 | 
21 | ;(async () => {
22 |   const res = await benchmark(detect)
23 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
24 |   fs.writeFileSync('./data/bench/franc.json', JSON.stringify(res, null, 2))
25 | })()
26 | 


--------------------------------------------------------------------------------
/src/benchmark/franc-all.ts:
--------------------------------------------------------------------------------
 1 | import { toISO2 } from '../core'
 2 | import { benchmark } from './bench'
 3 | import fs from 'fs'
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const franc = require('franc-all')
 6 | 
 7 | const langMap: { [id: string]: string } = {
 8 |   arb: 'ara',
 9 |   fas: 'pes',
10 |   lav: 'lat',
11 |   nno: 'nob'
12 | }
13 | 
14 | function detect(val: string): string {
15 |   let res = franc(val)
16 |   if (res === 'und') res = ''
17 |   else if (res in langMap) res = langMap[res]
18 |   return res ? toISO2(res) : ''
19 | }
20 | 
21 | ;(async () => {
22 |   const res = await benchmark(detect)
23 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
24 |   fs.writeFileSync('./data/bench/franc-all.json', JSON.stringify(res, null, 2))
25 | })()
26 | 


--------------------------------------------------------------------------------
/src/benchmark/franc-min.ts:
--------------------------------------------------------------------------------
 1 | import { toISO2 } from '../core'
 2 | import { benchmark } from './bench'
 3 | import fs from 'fs'
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const franc = require('franc-min')
 6 | 
 7 | const langMap: { [id: string]: string } = {
 8 |   arb: 'ara',
 9 |   fas: 'pes',
10 |   lav: 'lat',
11 |   nno: 'nob'
12 | }
13 | 
14 | function detect(val: string): string {
15 |   let res = franc(val)
16 |   if (res === 'und') res = ''
17 |   else if (res in langMap) res = langMap[res]
18 |   return res ? toISO2(res) : ''
19 | }
20 | 
21 | ;(async () => {
22 |   const res = await benchmark(detect)
23 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
24 |   fs.writeFileSync('./data/bench/franc-min.json', JSON.stringify(res, null, 2))
25 | })()
26 | 


--------------------------------------------------------------------------------
/playground/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tinyld-playground",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "vite build",
 9 |     "preview": "vite preview"
10 |   },
11 |   "dependencies": {
12 |     "@fortawesome/fontawesome-svg-core": "^6.2.0",
13 |     "@fortawesome/free-brands-svg-icons": "^6.2.0",
14 |     "@fortawesome/free-regular-svg-icons": "^6.2.0",
15 |     "@fortawesome/free-solid-svg-icons": "^6.2.0",
16 |     "@fortawesome/vue-fontawesome": "^3.0.2",
17 |     "@vueuse/core": "^9.5.0",
18 |     "tailwindcss": "^3.2.3",
19 |     "tinyld": "^1.3.1",
20 |     "vue": "^3.2.41"
21 |   },
22 |   "devDependencies": {
23 |     "@vitejs/plugin-vue": "^3.2.0",
24 |     "postcss": "^8.4.12",
25 |     "vite": "^3.2.3"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "extends": [
 4 |     "eslint:recommended"
 5 |   ],
 6 |   "env": {
 7 |     "node": true,
 8 |     "es2021": true
 9 |   },
10 |   "parserOptions": {
11 |     "ecmaVersion": 12,
12 |     "sourceType": "module"
13 |   },
14 |   "overrides": [
15 |     {
16 |       "files": ["**/*.ts", "**/*.tsx"],
17 |       "parser": "@typescript-eslint/parser",
18 |       "plugins": [
19 |         "@typescript-eslint"
20 |       ],
21 |       "extends": [
22 |         "eslint:recommended",
23 |         "plugin:@typescript-eslint/eslint-recommended",
24 |         "plugin:@typescript-eslint/recommended"
25 |       ],
26 |       "parserOptions": {
27 |         "project": ["./tsconfig.json"]
28 |       },
29 |       "rules": {
30 |         "@typescript-eslint/no-extra-semi": "off"
31 |       }
32 |     }
33 |   ],
34 |   "rules": {
35 |     "no-extra-semi": "off"
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/tests/locale.js:
--------------------------------------------------------------------------------
 1 | const { test } = require('uvu')
 2 | const assert = require('uvu/assert')
 3 | const { supportedLanguages, validateISO2, toISO3, toISO2 } = require('../dist/tinyld.normal.node.js')
 4 | 
 5 | test('Supported Language', () => {
 6 |   assert.is(supportedLanguages.length, 62)
 7 | })
 8 | 
 9 | test('Validate Locale', () => {
10 |   assert.is(validateISO2('jp'), 'ja')
11 |   assert.is(validateISO2('ja'), 'ja')
12 |   assert.is(validateISO2('fr'), 'fr')
13 |   assert.is(validateISO2('us'), 'en')
14 |   assert.is(validateISO2('gb'), 'en')
15 |   assert.is(validateISO2('en'), 'en')
16 | })
17 | 
18 | test('Locale toISO3', () => {
19 |   assert.is(toISO3('jp'), 'jpn')
20 |   assert.is(toISO3('ja'), 'jpn')
21 |   assert.is(toISO3('fr'), 'fra')
22 | })
23 | 
24 | test('Locale toISO2', () => {
25 |   assert.is(toISO2('jpn'), 'ja')
26 |   assert.is(toISO2('fra'), 'fr')
27 | })
28 | 
29 | test.run()
30 | 


--------------------------------------------------------------------------------
/src/benchmark/cld.ts:
--------------------------------------------------------------------------------
 1 | import { benchmark } from './bench'
 2 | import fs from 'fs'
 3 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 4 | const cld = require('cld')
 5 | 
 6 | const langMap: { [id: string]: string } = {
 7 |   iw: 'he', // hebrew changed in 1988, no idea why cld is still using this
 8 |   'zh-Hant': 'zh',
 9 |   'xx-Java': 'jv',
10 |   ms: 'id',
11 |   jw: 'jv'
12 | }
13 | 
14 | async function detect(val: string) {
15 |   try {
16 |     const result = await cld.detect(val)
17 |     let res = result.languages[0].code
18 |     if (res in langMap) res = langMap[res]
19 |     return res
20 |   } catch (err) {
21 |     //
22 |   }
23 |   return ''
24 | }
25 | 
26 | ;(async () => {
27 |   const res = await benchmark(detect)
28 |   if (!fs.existsSync('./data/bench')) fs.mkdirSync('./data/bench')
29 |   fs.writeFileSync('./data/bench/cld.json', JSON.stringify(res, null, 2))
30 | })()
31 | 


--------------------------------------------------------------------------------
/src/clean/index.ts:
--------------------------------------------------------------------------------
 1 | const REGEXP_PUNCTUATIONS = /[,.。，、#%&/\\+*¡!¿?[\]！？;:…„“«»”"“_–—~]/gi
 2 | const REGEXP_NUMBERS = /[0-9]/g
 3 | const REGEXP_FULLWIDTH_NUMBERS = /[\uFF10-\uFF19]/g
 4 | const REGEXP_SPACES = /\s\s+/g
 5 | const REGEXP_APOSTROPHE = /’/gi
 6 | const REGEXP_NORMALIZE = /[\u0300-\u036f]/g
 7 | 
 8 | export function isString(value: unknown): boolean {
 9 |   return typeof value === 'string' || value instanceof String
10 | }
11 | 
12 | export function cleanString(value: string): string {
13 |   return value
14 |     .toLowerCase()
15 |     .replace(REGEXP_APOSTROPHE, "'")
16 |     .replace(REGEXP_PUNCTUATIONS, ' ')
17 |     .replace(REGEXP_FULLWIDTH_NUMBERS, (m) => String.fromCharCode(m.charCodeAt(0) - 0xfee0))
18 |     .replace(REGEXP_NUMBERS, '')
19 |     .replace(REGEXP_SPACES, ' ')
20 |     .trim()
21 | }
22 | 
23 | export function normalize(value: string): string {
24 |   return value.normalize('NFD').replace(REGEXP_NORMALIZE, '')
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/clean.js:
--------------------------------------------------------------------------------
 1 | const { test } = require('uvu')
 2 | const assert = require('uvu/assert')
 3 | const { cleanString } = require('../dist/tinyld.normal.node.js')
 4 | 
 5 | function check(str, expected) {
 6 |   assert.is(cleanString(str), expected, `Clean ${str}`)
 7 | }
 8 | 
 9 | test('Clean String - Punctuation', () => {
10 |   check('Bonjour', 'bonjour')
11 |   check('Bonjour,', 'bonjour')
12 |   check('Bonjour, comment ca va?', 'bonjour comment ca va')
13 |   check('先程、どういうわけかマイクが入りませんでした。', '先程 どういうわけかマイクが入りませんでした')
14 |   check('¿Dónde vives?', 'dónde vives')
15 | 
16 |   check('那是一张近照吗？', '那是一张近照吗')
17 |   check('那就表示有問題...', '那就表示有問題')
18 |   check('要变得完美，她就是少了一个缺点。', '要变得完美 她就是少了一个缺点')
19 |   check(
20 |     '"Daran habe ich nie gedacht", sagte der alte Mann. "Was sollen wir tun?"',
21 |     'daran habe ich nie gedacht sagte der alte mann was sollen wir tun'
22 |   )
23 |   check(
24 |     '„Wann wirst du zurückkommen?“ – „Das hängt ganz vom Wetter ab.“',
25 |     'wann wirst du zurückkommen das hängt ganz vom wetter ab'
26 |   )
27 | })
28 | 
29 | test.run()
30 | 


--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Komodo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | ## NodeJS
 4 | 
 5 | ```sh
 6 | # for npm users
 7 | npm install --save tinyld
 8 | 
 9 | # for yarn users
10 | yarn add tinyld
11 | ```
12 | 
13 | Then usage
14 | 
15 | ```ts
16 | const { detect } = require('tinyld')
17 | // or ESM
18 | import { detect } from 'tinyld'
19 | ```
20 | 
21 | ## Browser Usage (CDN)
22 | 
23 | ```html
24 | <script type="module">
25 |   import { detect } from 'https://cdn.jsdelivr.net/npm/tinyld@1.3.0/dist/tinyld.normal.browser.js'
26 |   // ...
27 | </script>
28 | ```
29 | 
30 | ## Deno (Pika CDN)
31 | 
32 | ```ts
33 | import { detect } from 'https://cdn.skypack.dev/tinyld'
34 | ```
35 | 
36 | ---
37 | 
38 | ## API
39 | 
40 | ```js
41 | import { detect, detectAll } from 'tinyld'
42 | // or node: `const { detect } = require('tinyld')`
43 | 
44 | // Detect
45 | detect('ceci est un text en francais.') // fr
46 | detect('これは日本語です.') // ja
47 | detect('and this is english.') // en
48 | 
49 | // DetectAll
50 | detectAll('ceci est un text en francais.')
51 | // [ { lang: 'fr', accuracy: 0.5238 }, { lang: 'ro', accuracy: 0.3802 }, ... ]
52 | ```
53 | 
54 | ---
55 | 
56 | [More about the API Documentation](./api.md)
57 | 


--------------------------------------------------------------------------------
/tests/detect.js:
--------------------------------------------------------------------------------
 1 | const { test } = require('uvu')
 2 | const assert = require('uvu/assert')
 3 | const { detect, detectAll } = require('../dist/tinyld.normal.node.js')
 4 | 
 5 | function assertLocale(locale, val) {
 6 |   const res = detectAll(val)
 7 |   if (res.length > 0 && res[0].lang != locale) detectAll(val, { verbose: true })
 8 |   assert.is(detect(val), locale, `is ${locale} : ${val}`)
 9 | }
10 | 
11 | test('Check input', () => {
12 |   assert.is(detect(''), '')
13 |   assert.is(detect(1), '')
14 | })
15 | 
16 | test('Detect French', () => {
17 |   assertLocale('fr', 'Bonjour les gens')
18 |   assertLocale('fr', 'Bonne après-midi')
19 |   assertLocale('fr', 'Ceci est un texte en francais.')
20 |   // assertLocale('fr', 'reste cool sac a merde')
21 | })
22 | 
23 | test('Detect Japanese', () => {
24 |   assertLocale('ja', 'モリーンです。')
25 |   assertLocale('ja', '本は面白いです')
26 |   assertLocale('ja', 'これは日本語です.')
27 | })
28 | 
29 | test('Detect Korean', () => {
30 |   assertLocale('ko', '저는 7년 동안 한국에서 살았어요')
31 |   assertLocale('ko', '한국인')
32 | })
33 | 
34 | test('Detect English', () => {
35 |   assertLocale('en', 'I’m still learning English, so please speak slowly.')
36 |   assertLocale('en', 'I just started working here')
37 |   assertLocale('en', 'Good morning')
38 |   assertLocale('en', 'and this is english.')
39 | })
40 | 
41 | test.run()
42 | 


--------------------------------------------------------------------------------
/utils/overall.js:
--------------------------------------------------------------------------------
 1 | const chartistSvg = require('chartist-svg')
 2 | 
 3 | module.exports = (data) => {
 4 |   const libraries = Object.keys(data)
 5 | 
 6 |   var graph = {
 7 |     title: 'NodeJS Language Detection - Overall Accuracy',
 8 |     subtitle: ' (green: Success, orange: Unidentified, red: Error)',
 9 |     labels: libraries,
10 |     series: [
11 |       Object.values(data).map((x) => x.stats.success_rate),
12 |       Object.values(data).map((x) => x.stats.unindentified_rate),
13 |       Object.values(data).map((x) => x.stats.error_rate)
14 |     ]
15 |   }
16 | 
17 |   var options = {
18 |     options: {
19 |       low: 30,
20 |       high: 100,
21 |       onlyInteger: true,
22 |       width: 1200,
23 |       height: 600,
24 |       stackBars: true
25 |     },
26 |     css: `
27 | svg { background: #FFF; }
28 | 
29 | .ct-series-a .ct-bar, .ct-series-a .ct-line, .ct-series-a .ct-point, .ct-series-a .ct-slice-donut {
30 |     stroke: #468966;
31 |     stroke-width: 40px !important;
32 | }
33 | .ct-series-b .ct-bar, .ct-series-b .ct-line, .ct-series-b .ct-point, .ct-series-b .ct-slice-donut {
34 |     stroke: #FEC771;
35 |     stroke-width: 40px !important;
36 | }
37 | .ct-series-c .ct-bar, .ct-series-c .ct-line, .ct-series-c .ct-point, .ct-series-c .ct-slice-donut {
38 |     stroke: #EB7070;
39 |     stroke-width: 40px !important;
40 | }
41 | `
42 |   }
43 | 
44 |   return chartistSvg('bar', graph, options)
45 | }
46 | 


--------------------------------------------------------------------------------
/.github/workflows/playground.yml:
--------------------------------------------------------------------------------
 1 | # Simple workflow for deploying static content to GitHub Pages
 2 | name: Deploy static content to Pages
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["develop"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow one concurrent deployment
19 | concurrency:
20 |   group: "pages"
21 |   cancel-in-progress: true
22 | 
23 | jobs:
24 |   # Single deploy job since we're just deploying
25 |   deploy:
26 |     environment:
27 |       name: github-pages
28 |       url: ${{ steps.deployment.outputs.page_url }}
29 |     runs-on: ubuntu-latest
30 |     steps:
31 |       - name: Checkout
32 |         uses: actions/checkout@v3
33 |       - uses: actions/setup-node@v3
34 |         with:
35 |           node-version: 18
36 |           cache: 'npm'
37 |       - run: |
38 |           cd playground
39 |           yarn
40 |           yarn build --base=/tinyld/
41 |       - name: Setup Pages
42 |         uses: actions/configure-pages@v2
43 |       - name: Upload artifact
44 |         uses: actions/upload-pages-artifact@v1
45 |         with:
46 |           # Upload entire repository
47 |           path: './playground/dist'
48 |       - name: Deploy to GitHub Pages
49 |         id: deployment
50 |         uses: actions/deploy-pages@v1
51 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | import { isString } from './clean'
 2 | import { DetectOption, ILangCompressedProfiles, ILangProfiles, langFromId, parseDetectOption } from './core'
 3 | import data from './profiles/normal.json'
 4 | import { detectAllStats } from './tokenizer'
 5 | 
 6 | const compressed = data as ILangCompressedProfiles
 7 | const profiles: ILangProfiles = {
 8 |   uniques: Object.fromEntries(
 9 |     Object.entries(compressed.uniques).map((x) => {
10 |       return [x[0], langFromId[parseInt(x[1].toString(), 36)]]
11 |     })
12 |   ),
13 |   multiples: Object.fromEntries(
14 |     Object.entries(compressed.multiples).map((x) => {
15 |       const entry = Object.fromEntries(
16 |         x[1].match(/(.{1,4})/g)?.map((y) => {
17 |           const [country, val] = y.match(/(.{1,2})/g) as string[]
18 |           return [langFromId[parseInt(country, 36)], parseInt(val, 36)]
19 |         }) || []
20 |       )
21 |       return [x[0], entry]
22 |     })
23 |   )
24 | }
25 | const uniqueKeys = new Set(Object.keys(data.uniques))
26 | 
27 | export function detect(text: string, opts?: Partial<DetectOption>): string {
28 |   const res = detectAll(text, opts)
29 |   return res.length > 0 ? res[0].lang : ''
30 | }
31 | 
32 | export function detectAll(text: string, opts?: Partial<DetectOption>): { lang: string; accuracy: number }[] {
33 |   const options = parseDetectOption(opts)
34 |   if (!isString(text)) return []
35 | 
36 |   return detectAllStats(text, options, profiles, uniqueKeys)
37 | }
38 | 
39 | export { cleanString } from './clean'
40 | export { toISO2, toISO3, langName, langRegion, validateISO2, supportedLanguages } from './core'
41 | 


--------------------------------------------------------------------------------
/src/index_heavy.ts:
--------------------------------------------------------------------------------
 1 | import { isString } from './clean'
 2 | import { DetectOption, ILangCompressedProfiles, ILangProfiles, langFromId, parseDetectOption } from './core'
 3 | import data from './profiles/heavy.json'
 4 | import { detectAllStats } from './tokenizer'
 5 | 
 6 | const compressed = data as ILangCompressedProfiles
 7 | const profiles: ILangProfiles = {
 8 |   uniques: Object.fromEntries(
 9 |     Object.entries(compressed.uniques).map((x) => {
10 |       return [x[0], langFromId[parseInt(x[1].toString(), 36)]]
11 |     })
12 |   ),
13 |   multiples: Object.fromEntries(
14 |     Object.entries(compressed.multiples).map((x) => {
15 |       const entry = Object.fromEntries(
16 |         x[1].match(/(.{1,4})/g)?.map((y) => {
17 |           const [country, val] = y.match(/(.{1,2})/g) as string[]
18 |           return [langFromId[parseInt(country, 36)], parseInt(val, 36)]
19 |         }) || []
20 |       )
21 |       return [x[0], entry]
22 |     })
23 |   )
24 | }
25 | const uniqueKeys = new Set(Object.keys(data.uniques))
26 | 
27 | export function detect(text: string, opts?: Partial<DetectOption>): string {
28 |   const res = detectAll(text, opts)
29 |   return res.length > 0 ? res[0].lang : ''
30 | }
31 | 
32 | export function detectAll(text: string, opts?: Partial<DetectOption>): { lang: string; accuracy: number }[] {
33 |   const options = parseDetectOption(opts)
34 |   if (!isString(text)) return []
35 | 
36 |   return detectAllStats(text, options, profiles, uniqueKeys)
37 | }
38 | 
39 | export { cleanString } from './clean'
40 | export { toISO2, toISO3, langName, langRegion, validateISO2, supportedLanguages } from './core'
41 | 


--------------------------------------------------------------------------------
/src/index_light.ts:
--------------------------------------------------------------------------------
 1 | import { isString } from './clean'
 2 | import { DetectOption, ILangProfiles, parseDetectOption, ILangCompressedProfiles, langFromId } from './core'
 3 | import data from './profiles/light.json'
 4 | import { detectAllStats } from './tokenizer'
 5 | 
 6 | const compressed = data as ILangCompressedProfiles
 7 | const profiles: ILangProfiles = {
 8 |   uniques: Object.fromEntries(
 9 |     Object.entries(compressed.uniques).map((x) => {
10 |       return [x[0], langFromId[parseInt(x[1].toString(), 36)]]
11 |     })
12 |   ),
13 |   multiples: Object.fromEntries(
14 |     Object.entries(compressed.multiples).map((x) => {
15 |       const entry = Object.fromEntries(
16 |         x[1].match(/(.{1,3})/g)?.map((y) => {
17 |           const country = y.slice(0, 1)
18 |           const val = y.slice(1)
19 |           return [langFromId[parseInt(country, 36)], parseInt(val, 36)]
20 |         }) || []
21 |       )
22 |       return [x[0], entry]
23 |     })
24 |   )
25 | }
26 | const uniqueKeys = new Set(Object.keys(data.uniques))
27 | 
28 | export function detect(text: string, opts?: Partial<DetectOption>): string {
29 |   const res = detectAll(text, opts)
30 |   return res.length > 0 ? res[0].lang : ''
31 | }
32 | 
33 | export function detectAll(text: string, opts?: Partial<DetectOption>): { lang: string; accuracy: number }[] {
34 |   const options = parseDetectOption(opts)
35 |   if (!isString(text)) return []
36 | 
37 |   return detectAllStats(text, options, profiles, uniqueKeys)
38 | }
39 | 
40 | export { cleanString } from './clean'
41 | export { toISO2, toISO3, langName, langRegion, validateISO2, supportedLanguages } from './core'
42 | 


--------------------------------------------------------------------------------
/utils/index.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const graphOverall = require('./overall')
 3 | const graphLanguage = require('./language')
 4 | const graphLength = require('./length')
 5 | const graphExecution = require('./exectime')
 6 | 
 7 | function getJSON(filepath) {
 8 |   return JSON.parse(fs.readFileSync(filepath))
 9 | }
10 | 
11 | ;(async () => {
12 |   const data = {
13 |     'tinyld-heavy': getJSON('./data/bench/tinyld-heavy.json'),
14 |     tinyld: getJSON('./data/bench/tinyld.json'),
15 |     'tinyld-light': getJSON('./data/bench/tinyld-light.json'),
16 |     langdetect: getJSON('./data/bench/langdetect.json'),
17 |     cld: getJSON('./data/bench/cld.json'),
18 |     franc: getJSON('./data/bench/franc.json'),
19 |     'franc-min': getJSON('./data/bench/franc-min.json'),
20 |     'franc-all': getJSON('./data/bench/franc-all.json'),
21 |     languagedetect: getJSON('./data/bench/languagedetect.json')
22 |   }
23 | 
24 |   const overall = await graphOverall(data)
25 |   fs.writeFileSync('./docs/overall.svg', overall.replace('<svg ', '<svg xmlns="http://www.w3.org/2000/svg" '))
26 | 
27 |   const lang = await graphLanguage(data, ['jpn', 'kor', 'cmn', 'ara', 'fin', 'rus', 'fra', 'spa', 'por', 'eng'])
28 |   fs.writeFileSync('./docs/language.svg', lang.replace('<svg ', '<svg xmlns="http://www.w3.org/2000/svg" '))
29 | 
30 |   const len = await graphLength(data)
31 |   fs.writeFileSync('./docs/length.svg', len.replace('<svg ', '<svg xmlns="http://www.w3.org/2000/svg" '))
32 | 
33 |   const exe = await graphExecution(data)
34 |   fs.writeFileSync('./docs/exec_time.svg', exe.replace('<svg ', '<svg xmlns="http://www.w3.org/2000/svg" '))
35 | })()
36 | 


--------------------------------------------------------------------------------
/src/train/splitter.ts:
--------------------------------------------------------------------------------
 1 | import fs from 'fs'
 2 | import readline from 'readline'
 3 | import { cleanString } from '../clean'
 4 | import { wordTokenizer } from '../tokenizer'
 5 | 
 6 | export interface FreqWord {
 7 |   word: string
 8 |   count: number
 9 | }
10 | 
11 | export async function processSentencesLineByLine(fileIn: string): Promise<FreqWord[]> {
12 |   const wordRank = new Map<string, number>()
13 |   const fileStream = fs.createReadStream(fileIn)
14 |   const rl = readline.createInterface({
15 |     input: fileStream,
16 |     crlfDelay: Infinity
17 |   })
18 | 
19 |   for await (const line of rl) {
20 |     const words = wordTokenizer(cleanString(line))
21 |     words.forEach((x) => {
22 |       if (!x) return
23 |       wordRank.set(x, (wordRank.get(x) || 0) + 1)
24 |     })
25 |   }
26 | 
27 |   const values = [...wordRank.entries()]
28 | 
29 |   return values.map((x) => {
30 |     return { word: x[0], count: x[1] } as FreqWord
31 |   })
32 | }
33 | 
34 | export async function processFrequencyLineByLine(fileIn: string): Promise<FreqWord[]> {
35 |   const wordRank = new Map<string, number>()
36 | 
37 |   const fileStream = fs.createReadStream(fileIn)
38 |   const rl = readline.createInterface({
39 |     input: fileStream,
40 |     crlfDelay: Infinity
41 |   })
42 | 
43 |   for await (const line of rl) {
44 |     const [text, count] = line.split(' ')
45 |     const str = cleanString(text)
46 |     if (!str || str.startsWith("'")) continue
47 |     wordRank.set(str, parseInt(count))
48 |   }
49 | 
50 |   const values = [...wordRank.entries()]
51 |   return values.map((x) => {
52 |     return { word: x[0], count: x[1] } as FreqWord
53 |   })
54 | }
55 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
 1 | # API
 2 | 
 3 | ## Language Detection
 4 | 
 5 | ### Detect
 6 | 
 7 | ```js
 8 | // basic detection
 9 | detect('this is the text') // => 'en'
10 | 
11 | // verbose mode
12 | detect('this is the text', { verbose: true }) // => 'en'
13 | 
14 | // only in a subset of languages
15 | detect('this is the text', { only: ['fr', 'en', 'nl'] }) // => 'en'
16 | ```
17 | 
18 | ### Detect All
19 | 
20 | ```js
21 | detectAll('this is the text')
22 | /*
23 | [
24 |   { lang: 'en', accuracy: 0.958076923076923 },
25 |   { lang: 'nl', accuracy: 0.15384615384615385 },
26 |   { lang: 'ga', accuracy: 0.14555384615384614 },
27 |   { lang: 'lt', accuracy: 0.03804615384615384 },
28 |   { lang: 'vo', accuracy: 0.03303076923076923 },
29 |   { lang: 'hu', accuracy: 0.022338461538461536 },
30 |   { lang: 'la', accuracy: 0.006738461538461531 },
31 |   { lang: 'fr', accuracy: 0.0025153846153846203 }
32 | ]
33 | */
34 | ```
35 | 
36 | ---
37 | 
38 | ## Language Code Conversion
39 | 
40 | This library also expose some language code conversion functions, to switch between iso2 (`ISO 639-1`) and iso3 (`ISO 639-3`) and get compatible with a range of API/Tools.
41 | 
42 | ```js
43 | import { toISO2, toISO3 } from 'tinyld'
44 | 
45 | toISO2('jpn') // ja
46 | toISO3('jp') // jpn
47 | toISO3('ja') // jpn
48 | ```
49 | 
50 | Also contains some alias for deprecated or common mistakes (`jp` is an alias of `ja`, `cn` is an alias of `zh`, ...)
51 | 
52 | ---
53 | 
54 | ## Language Helpers
55 | 
56 | ```js
57 | import { supportedLanguages, langName, langRegion } from 'tinyld'
58 | 
59 | // all supported languages (ISO3 format)
60 | supportedLanguages // ['jpn', 'cmn', ...]
61 | 
62 | // and few utils about langs
63 | langName('jpn') // Japanese
64 | langRegion('jpn') // east-asia
65 | ```
66 | 


--------------------------------------------------------------------------------
/utils/exectime.js:
--------------------------------------------------------------------------------
 1 | const chartistSvg = require('chartist-svg')
 2 | 
 3 | module.exports = (data) => {
 4 |   const length = Object.keys(data.tinyld.size)
 5 |   var graph = {
 6 |     title: 'NodeJS Language Detection - Execution Time',
 7 |     subtitle: 'in milliseconds (lower is better)',
 8 |     labels: length,
 9 |     series: ['tinyld', 'langdetect', 'cld', 'franc'].map((lib) => {
10 |       return length.map((len) => {
11 |         return data[lib].size[len].execution_time
12 |       })
13 |     })
14 |   }
15 | 
16 |   var options = {
17 |     options: {
18 |       low: 0,
19 |       seriesBarDistance: 16,
20 |       onlyInteger: true,
21 |       // reverseData: true,
22 |       // horizontalBars: true,
23 |       width: 1200,
24 |       height: 600
25 |     },
26 |     css: `
27 | svg { background: #FFF; }
28 | 
29 | .ct-series-a .ct-bar, .ct-series-a .ct-line, .ct-series-a .ct-point, .ct-series-a .ct-slice-donut {
30 |     stroke: #468966;
31 |     stroke-width: 16px !important;
32 | }
33 | .ct-series-b .ct-bar, .ct-series-b .ct-line, .ct-series-b .ct-point, .ct-series-b .ct-slice-donut {
34 |     stroke: #98BAE7;
35 |     stroke-width: 8px !important;
36 | }
37 | .ct-series-c .ct-bar, .ct-series-c .ct-line, .ct-series-c .ct-point, .ct-series-c .ct-slice-donut {
38 |     stroke: #FEC771;
39 |     stroke-width: 8px !important;
40 | }
41 | .ct-series-d .ct-bar, .ct-series-d .ct-line, .ct-series-d .ct-point, .ct-series-d .ct-slice-donut {
42 |     stroke: #F38181;
43 |     stroke-width: 8px !important;
44 | }
45 | .ct-series-e .ct-bar, .ct-series-e .ct-line, .ct-series-e .ct-point, .ct-series-e .ct-slice-donut {
46 |     stroke: #D47AE8;
47 |     stroke-width: 8px !important;
48 | }
49 | `
50 |   }
51 | 
52 |   return chartistSvg('line', graph, options)
53 | }
54 | 


--------------------------------------------------------------------------------
/utils/language.js:
--------------------------------------------------------------------------------
 1 | const chartistSvg = require('chartist-svg')
 2 | 
 3 | module.exports = (data, langs) => {
 4 |   var graph = {
 5 |     title: 'NodeJS Language Detection - Per Language',
 6 |     subtitle: 'Tinyld vs Langdetect vs Cld vs Franc',
 7 |     labels: langs.map((x) => x.toUpperCase()),
 8 |     series: ['tinyld', 'langdetect', 'cld', 'franc'].map((lib) => {
 9 |       return langs.map((lang) => {
10 |         return data[lib].languages[lang]
11 |       })
12 |     })
13 |   }
14 | 
15 |   var options = {
16 |     options: {
17 |       high: 100,
18 |       low: 30,
19 |       seriesBarDistance: 16,
20 |       onlyInteger: true,
21 |       // reverseData: true,
22 |       // horizontalBars: true,
23 |       width: 1200,
24 |       height: 600
25 |     },
26 |     css: `
27 | svg { background: #FFF; }
28 | 
29 | .ct-series-a .ct-bar, .ct-series-a .ct-line, .ct-series-a .ct-point, .ct-series-a .ct-slice-donut {
30 |     stroke: #468966;
31 |     stroke-width: 16px !important;
32 | }
33 | .ct-series-b .ct-bar, .ct-series-b .ct-line, .ct-series-b .ct-point, .ct-series-b .ct-slice-donut {
34 |     stroke: #98BAE7;
35 |     stroke-width: 16px !important;
36 | }
37 | .ct-series-c .ct-bar, .ct-series-c .ct-line, .ct-series-c .ct-point, .ct-series-c .ct-slice-donut {
38 |     stroke: #FEC771;
39 |     stroke-width: 16px !important;
40 | }
41 | .ct-series-d .ct-bar, .ct-series-d .ct-line, .ct-series-d .ct-point, .ct-series-d .ct-slice-donut {
42 |     stroke: #F38181;
43 |     stroke-width: 16px !important;
44 | }
45 | .ct-series-e .ct-bar, .ct-series-e .ct-line, .ct-series-e .ct-point, .ct-series-e .ct-slice-donut {
46 |     stroke: #D47AE8;
47 |     stroke-width: 16px !important;
48 | }
49 | `
50 |   }
51 | 
52 |   return chartistSvg('bar', graph, options)
53 | }
54 | 


--------------------------------------------------------------------------------
/utils/length.js:
--------------------------------------------------------------------------------
 1 | const chartistSvg = require('chartist-svg')
 2 | 
 3 | module.exports = (data) => {
 4 |   const length = Object.keys(data.tinyld.size)
 5 |   var graph = {
 6 |     title: 'NodeJS Language Detection - Text Length',
 7 |     subtitle: 'Accuracy in % (higher is better)',
 8 |     labels: length,
 9 |     series: ['tinyld', 'langdetect', 'cld', 'franc'].map((lib) => {
10 |       return length.map((len) => {
11 |         return data[lib].size[len].success_rate
12 |       })
13 |     })
14 |   }
15 | 
16 |   var options = {
17 |     options: {
18 |       high: 100,
19 |       low: 10,
20 |       seriesBarDistance: 16,
21 |       onlyInteger: true,
22 |       // reverseData: true,
23 |       // horizontalBars: true,
24 |       width: 1200,
25 |       height: 600
26 |     },
27 |     css: `
28 | svg { background: #FFF; }
29 | 
30 | .ct-series-a .ct-bar, .ct-series-a .ct-line, .ct-series-a .ct-point, .ct-series-a .ct-slice-donut {
31 |     stroke: #468966;
32 |     stroke-width: 16px !important;
33 | }
34 | .ct-series-b .ct-bar, .ct-series-b .ct-line, .ct-series-b .ct-point, .ct-series-b .ct-slice-donut {
35 |     stroke: #98BAE7;
36 |     stroke-width: 8px !important;
37 | }
38 | .ct-series-c .ct-bar, .ct-series-c .ct-line, .ct-series-c .ct-point, .ct-series-c .ct-slice-donut {
39 |     stroke: #FEC771;
40 |     stroke-width: 8px !important;
41 | }
42 | .ct-series-d .ct-bar, .ct-series-d .ct-line, .ct-series-d .ct-point, .ct-series-d .ct-slice-donut {
43 |     stroke: #F38181;
44 |     stroke-width: 8px !important;
45 | }
46 | .ct-series-e .ct-bar, .ct-series-e .ct-line, .ct-series-e .ct-point, .ct-series-e .ct-slice-donut {
47 |     stroke: #D47AE8;
48 |     stroke-width: 8px !important;
49 | }
50 | `
51 |   }
52 | 
53 |   return chartistSvg('line', graph, options)
54 | }
55 | 


--------------------------------------------------------------------------------
/docs/dev.md:
--------------------------------------------------------------------------------
 1 | # Development
 2 | 
 3 | ## Commands
 4 | 
 5 | ```sh
 6 | # Install
 7 | yarn
 8 | 
 9 | # Build
10 | yarn build
11 | 
12 | # Test
13 | yarn test
14 | 
15 | # Lint / Auto-fix code style problems
16 | yarn lint
17 | ```
18 | 
19 | ---
20 | 
21 | ## Install issues
22 | 
23 | For the moment the library has lot of dev-dependencies purely for the benchmark process.
24 | Some of those libraries need to compile native code, which can be problematic (gcc, gyp, python, ...)
25 | 
26 | If you run into those issues, one of the easiest solution is to remove the problematic dependencies from `package.json` then try again to install.
27 | 
28 | [like here](https://github.com/komodojp/tinyld/issues/10#issuecomment-1019085476)
29 | 
30 | It will only cause issue with `yarn bench`, but everything else should still work normally
31 | 
32 | ---
33 | 
34 | ## Optional
35 | 
36 | ### 1. Generate profiles (`yarn train`)
37 | 
38 | This step require lot of data and time, so it's optional and the result are store directly in git.
39 | 
40 | This will analyse lot fo text in different language and build statistics to be able to identify the best features for each language
41 | 
42 | To be able to train the model, you will need first to have the dataset locally
43 | 
44 | ```
45 | Download Datasets
46 |  - Download the [Tatoeba sentence export](https://downloads.tatoeba.org/exports/sentences.tar.bz2)
47 |  - Extract in `data/tatoeba.csv`
48 |  - Download the [UDHR](https://unicode.org/udhr/assemblies/udhr_txt.zip)
49 |  - Extract in `data/udhr/`
50 | 
51 | Run yarn train
52 |   - For each language, it will build statistics for words and n-grams
53 |   - This goes through massive amount of data and will take time, prepare few coffee
54 | 
55 | When your profile files are generated, you can run `yarn build` and you will have a build with those new data
56 | ```
57 | 
58 | ### 2. Generate benchmark data (`yarn bench`)
59 | 
60 | This step require a bit of time, it will run lot of different test for a set of libraries to generate the benchmark page and diagrams.
61 | 


--------------------------------------------------------------------------------
/data/bench/langdetect.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 60.6875,
 5 |       "error_rate": 39.3125,
 6 |       "unindentified_rate": 0,
 7 |       "execution_time": 0.5959
 8 |     },
 9 |     "16": {
10 |       "success_rate": 77.4375,
11 |       "error_rate": 22.5625,
12 |       "unindentified_rate": 0,
13 |       "execution_time": 0.4825
14 |     },
15 |     "24": {
16 |       "success_rate": 89,
17 |       "error_rate": 11,
18 |       "unindentified_rate": 0,
19 |       "execution_time": 0.4178
20 |     },
21 |     "36": {
22 |       "success_rate": 94.2813,
23 |       "error_rate": 5.7188,
24 |       "unindentified_rate": 0,
25 |       "execution_time": 0.3856
26 |     },
27 |     "48": {
28 |       "success_rate": 97.5938,
29 |       "error_rate": 2.4063,
30 |       "unindentified_rate": 0,
31 |       "execution_time": 0.3764
32 |     },
33 |     "64": {
34 |       "success_rate": 99,
35 |       "error_rate": 1,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.3808
38 |     },
39 |     "128": {
40 |       "success_rate": 99.7813,
41 |       "error_rate": 0.2188,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.479
44 |     },
45 |     "256": {
46 |       "success_rate": 100,
47 |       "error_rate": 0,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 0.762
50 |     },
51 |     "512": {
52 |       "success_rate": 100,
53 |       "error_rate": 0,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 1.3573
56 |     },
57 |     "1024": {
58 |       "success_rate": 100,
59 |       "error_rate": 0,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 2.5523
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 87.36,
66 |     "max": 100,
67 |     "success_rate": 95.675,
68 |     "error_rate": 4.325,
69 |     "unindentified_rate": 0,
70 |     "execution_time": 0.3647
71 |   },
72 |   "languages": {
73 |     "heb": 100,
74 |     "jpn": 99.99,
75 |     "kor": 99.7267,
76 |     "ara": 99.63,
77 |     "cmn": 98.2,
78 |     "fin": 97.99,
79 |     "tur": 97.86,
80 |     "deu": 97.8,
81 |     "fra": 96.16,
82 |     "rus": 95.52,
83 |     "eng": 95.35,
84 |     "ita": 94.03,
85 |     "hin": 92.35,
86 |     "por": 91.1,
87 |     "spa": 89.71,
88 |     "nld": 87.36
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/tinyld-light.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 73,
 5 |       "error_rate": 15.3125,
 6 |       "unindentified_rate": 11.6875,
 7 |       "execution_time": 0.0584
 8 |     },
 9 |     "16": {
10 |       "success_rate": 88.8438,
11 |       "error_rate": 7.5625,
12 |       "unindentified_rate": 3.5937,
13 |       "execution_time": 0.0635
14 |     },
15 |     "24": {
16 |       "success_rate": 95.4688,
17 |       "error_rate": 4.125,
18 |       "unindentified_rate": 0.4063,
19 |       "execution_time": 0.0672
20 |     },
21 |     "36": {
22 |       "success_rate": 97.6563,
23 |       "error_rate": 2.25,
24 |       "unindentified_rate": 0.0938,
25 |       "execution_time": 0.087
26 |     },
27 |     "48": {
28 |       "success_rate": 99.0938,
29 |       "error_rate": 0.875,
30 |       "unindentified_rate": 0.0313,
31 |       "execution_time": 0.1128
32 |     },
33 |     "64": {
34 |       "success_rate": 99.5625,
35 |       "error_rate": 0.4375,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.1362
38 |     },
39 |     "128": {
40 |       "success_rate": 99.9375,
41 |       "error_rate": 0.0625,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.2514
44 |     },
45 |     "256": {
46 |       "success_rate": 100,
47 |       "error_rate": 0,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 0.466
50 |     },
51 |     "512": {
52 |       "success_rate": 100,
53 |       "error_rate": 0,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 0.5923
56 |     },
57 |     "1024": {
58 |       "success_rate": 100,
59 |       "error_rate": 0,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 0.7342
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 93.61,
66 |     "max": 100,
67 |     "success_rate": 97.8778,
68 |     "error_rate": 1.9842,
69 |     "unindentified_rate": 0.138,
70 |     "execution_time": 0.0947
71 |   },
72 |   "languages": {
73 |     "kor": 100,
74 |     "hin": 100,
75 |     "rus": 100,
76 |     "heb": 100,
77 |     "ara": 100,
78 |     "jpn": 99.99,
79 |     "cmn": 99.31,
80 |     "tur": 98.38,
81 |     "fin": 98.34,
82 |     "nld": 98,
83 |     "fra": 97.37,
84 |     "deu": 97.04,
85 |     "eng": 96.04,
86 |     "por": 95.01,
87 |     "ita": 93.99,
88 |     "spa": 93.61
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/tinyld.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 69.9375,
 5 |       "error_rate": 17.6563,
 6 |       "unindentified_rate": 12.4063,
 7 |       "execution_time": 0.0828
 8 |     },
 9 |     "16": {
10 |       "success_rate": 90.2188,
11 |       "error_rate": 7.75,
12 |       "unindentified_rate": 2.0313,
13 |       "execution_time": 0.0973
14 |     },
15 |     "24": {
16 |       "success_rate": 96.1875,
17 |       "error_rate": 3.3438,
18 |       "unindentified_rate": 0.4688,
19 |       "execution_time": 0.0988
20 |     },
21 |     "36": {
22 |       "success_rate": 98.3438,
23 |       "error_rate": 1.5938,
24 |       "unindentified_rate": 0.0625,
25 |       "execution_time": 0.1094
26 |     },
27 |     "48": {
28 |       "success_rate": 99.5,
29 |       "error_rate": 0.5,
30 |       "unindentified_rate": 0,
31 |       "execution_time": 0.1448
32 |     },
33 |     "64": {
34 |       "success_rate": 99.6875,
35 |       "error_rate": 0.3125,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.1822
38 |     },
39 |     "128": {
40 |       "success_rate": 99.9375,
41 |       "error_rate": 0.0625,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.2983
44 |     },
45 |     "256": {
46 |       "success_rate": 99.9688,
47 |       "error_rate": 0.0313,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 0.5834
50 |     },
51 |     "512": {
52 |       "success_rate": 100,
53 |       "error_rate": 0,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 0.697
56 |     },
57 |     "1024": {
58 |       "success_rate": 100,
59 |       "error_rate": 0,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 0.8079
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 96.13,
66 |     "max": 100,
67 |     "success_rate": 98.5231,
68 |     "error_rate": 1.3712,
69 |     "unindentified_rate": 0.1057,
70 |     "execution_time": 0.1191
71 |   },
72 |   "languages": {
73 |     "kor": 100,
74 |     "hin": 100,
75 |     "jpn": 99.99,
76 |     "heb": 99.73,
77 |     "ara": 99.59,
78 |     "cmn": 99.57,
79 |     "rus": 99.27,
80 |     "deu": 99.25,
81 |     "fra": 99,
82 |     "fin": 98.69,
83 |     "eng": 98.11,
84 |     "nld": 97.66,
85 |     "tur": 97.42,
86 |     "ita": 96.52,
87 |     "por": 96.16,
88 |     "spa": 96.13
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/franc-all.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 0.125,
 5 |       "error_rate": 0.0625,
 6 |       "unindentified_rate": 99.8125,
 7 |       "execution_time": 0.0017
 8 |     },
 9 |     "16": {
10 |       "success_rate": 41.0938,
11 |       "error_rate": 58.9063,
12 |       "unindentified_rate": 0,
13 |       "execution_time": 0.264
14 |     },
15 |     "24": {
16 |       "success_rate": 50,
17 |       "error_rate": 50,
18 |       "unindentified_rate": 0,
19 |       "execution_time": 0.3342
20 |     },
21 |     "36": {
22 |       "success_rate": 60.5938,
23 |       "error_rate": 39.4063,
24 |       "unindentified_rate": 0,
25 |       "execution_time": 0.3994
26 |     },
27 |     "48": {
28 |       "success_rate": 69.5625,
29 |       "error_rate": 30.4375,
30 |       "unindentified_rate": 0,
31 |       "execution_time": 0.509
32 |     },
33 |     "64": {
34 |       "success_rate": 73.5938,
35 |       "error_rate": 26.4062,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.6104
38 |     },
39 |     "128": {
40 |       "success_rate": 86.1563,
41 |       "error_rate": 13.8438,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.925
44 |     },
45 |     "256": {
46 |       "success_rate": 93.6875,
47 |       "error_rate": 6.3125,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 1.6346
50 |     },
51 |     "512": {
52 |       "success_rate": 96.25,
53 |       "error_rate": 3.75,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 2.8385
56 |     },
57 |     "1024": {
58 |       "success_rate": 98.4331,
59 |       "error_rate": 1.5669,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 4.78
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 29.47,
66 |     "max": 99.93,
67 |     "success_rate": 66.7081,
68 |     "error_rate": 33.2919,
69 |     "unindentified_rate": 0,
70 |     "execution_time": 0.4763
71 |   },
72 |   "languages": {
73 |     "jpn": 99.93,
74 |     "kor": 99.8633,
75 |     "cmn": 99.35,
76 |     "heb": 98.18,
77 |     "ara": 91.72,
78 |     "deu": 80.77,
79 |     "fin": 70.79,
80 |     "fra": 67.27,
81 |     "hin": 60.42,
82 |     "nld": 59.65,
83 |     "rus": 51.96,
84 |     "eng": 49.92,
85 |     "por": 49.39,
86 |     "ita": 42.55,
87 |     "tur": 32.27,
88 |     "spa": 29.47
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/tinyld-heavy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 79.2188,
 5 |       "error_rate": 17.8125,
 6 |       "unindentified_rate": 2.9688,
 7 |       "execution_time": 0.0777
 8 |     },
 9 |     "16": {
10 |       "success_rate": 95.0313,
11 |       "error_rate": 4.6563,
12 |       "unindentified_rate": 0.3125,
13 |       "execution_time": 0.0797
14 |     },
15 |     "24": {
16 |       "success_rate": 98.125,
17 |       "error_rate": 1.8438,
18 |       "unindentified_rate": 0.0313,
19 |       "execution_time": 0.0754
20 |     },
21 |     "36": {
22 |       "success_rate": 99.2813,
23 |       "error_rate": 0.6875,
24 |       "unindentified_rate": 0.0313,
25 |       "execution_time": 0.0881
26 |     },
27 |     "48": {
28 |       "success_rate": 99.4688,
29 |       "error_rate": 0.5313,
30 |       "unindentified_rate": 0,
31 |       "execution_time": 0.1106
32 |     },
33 |     "64": {
34 |       "success_rate": 99.8125,
35 |       "error_rate": 0.1875,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.1348
38 |     },
39 |     "128": {
40 |       "success_rate": 99.875,
41 |       "error_rate": 0.125,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.233
44 |     },
45 |     "256": {
46 |       "success_rate": 99.9375,
47 |       "error_rate": 0.0625,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 0.4433
50 |     },
51 |     "512": {
52 |       "success_rate": 100,
53 |       "error_rate": 0,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 0.509
56 |     },
57 |     "1024": {
58 |       "success_rate": 100,
59 |       "error_rate": 0,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 0.5808
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 97.52,
66 |     "max": 100,
67 |     "success_rate": 99.249,
68 |     "error_rate": 0.7478,
69 |     "unindentified_rate": 0.0032,
70 |     "execution_time": 0.096
71 |   },
72 |   "languages": {
73 |     "kor": 100,
74 |     "hin": 100,
75 |     "jpn": 99.99,
76 |     "heb": 99.88,
77 |     "ara": 99.87,
78 |     "deu": 99.72,
79 |     "cmn": 99.66,
80 |     "fra": 99.64,
81 |     "rus": 99.52,
82 |     "fin": 99.2,
83 |     "eng": 99.11,
84 |     "tur": 99.01,
85 |     "ita": 98.66,
86 |     "nld": 98.44,
87 |     "spa": 98.13,
88 |     "por": 97.52
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/franc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 0.125,
 5 |       "error_rate": 0.0625,
 6 |       "unindentified_rate": 99.8125,
 7 |       "execution_time": 0.0022
 8 |     },
 9 |     "16": {
10 |       "success_rate": 45.6875,
11 |       "error_rate": 54.3125,
12 |       "unindentified_rate": 0,
13 |       "execution_time": 0.1249
14 |     },
15 |     "24": {
16 |       "success_rate": 56.5938,
17 |       "error_rate": 43.4063,
18 |       "unindentified_rate": 0,
19 |       "execution_time": 0.1481
20 |     },
21 |     "36": {
22 |       "success_rate": 69.0938,
23 |       "error_rate": 30.9063,
24 |       "unindentified_rate": 0,
25 |       "execution_time": 0.1847
26 |     },
27 |     "48": {
28 |       "success_rate": 77.0625,
29 |       "error_rate": 22.9375,
30 |       "unindentified_rate": 0,
31 |       "execution_time": 0.2374
32 |     },
33 |     "64": {
34 |       "success_rate": 80.9688,
35 |       "error_rate": 19.0313,
36 |       "unindentified_rate": 0,
37 |       "execution_time": 0.2791
38 |     },
39 |     "128": {
40 |       "success_rate": 91.2813,
41 |       "error_rate": 8.7188,
42 |       "unindentified_rate": 0,
43 |       "execution_time": 0.4306
44 |     },
45 |     "256": {
46 |       "success_rate": 96.7188,
47 |       "error_rate": 3.2813,
48 |       "unindentified_rate": 0,
49 |       "execution_time": 0.7921
50 |     },
51 |     "512": {
52 |       "success_rate": 98.9063,
53 |       "error_rate": 1.0938,
54 |       "unindentified_rate": 0,
55 |       "execution_time": 1.3237
56 |     },
57 |     "1024": {
58 |       "success_rate": 99.6866,
59 |       "error_rate": 0.3134,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 2.33
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 48.96,
66 |     "max": 99.93,
67 |     "success_rate": 74.2577,
68 |     "error_rate": 25.7423,
69 |     "unindentified_rate": 0,
70 |     "execution_time": 0.2242
71 |   },
72 |   "languages": {
73 |     "jpn": 99.93,
74 |     "kor": 99.8633,
75 |     "cmn": 99.35,
76 |     "heb": 98.23,
77 |     "ara": 91.84,
78 |     "deu": 83.87,
79 |     "fra": 79.36,
80 |     "fin": 78.52,
81 |     "hin": 68.25,
82 |     "ita": 64.2,
83 |     "por": 62.29,
84 |     "nld": 62,
85 |     "eng": 60.39,
86 |     "rus": 52.32,
87 |     "spa": 51.24,
88 |     "tur": 48.96
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/cld.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 26.5,
 5 |       "error_rate": 2.4375,
 6 |       "unindentified_rate": 71.0625,
 7 |       "execution_time": 0.1076
 8 |     },
 9 |     "16": {
10 |       "success_rate": 57.5625,
11 |       "error_rate": 4.0625,
12 |       "unindentified_rate": 38.375,
13 |       "execution_time": 0.0891
14 |     },
15 |     "24": {
16 |       "success_rate": 81.6563,
17 |       "error_rate": 3.1563,
18 |       "unindentified_rate": 15.1875,
19 |       "execution_time": 0.0754
20 |     },
21 |     "36": {
22 |       "success_rate": 92.8125,
23 |       "error_rate": 1.75,
24 |       "unindentified_rate": 5.4375,
25 |       "execution_time": 0.0758
26 |     },
27 |     "48": {
28 |       "success_rate": 96.75,
29 |       "error_rate": 1.0938,
30 |       "unindentified_rate": 2.1563,
31 |       "execution_time": 0.0778
32 |     },
33 |     "64": {
34 |       "success_rate": 98.3125,
35 |       "error_rate": 0.5,
36 |       "unindentified_rate": 1.1875,
37 |       "execution_time": 0.0747
38 |     },
39 |     "128": {
40 |       "success_rate": 99.5625,
41 |       "error_rate": 0.0938,
42 |       "unindentified_rate": 0.3438,
43 |       "execution_time": 0.08
44 |     },
45 |     "256": {
46 |       "success_rate": 99.9375,
47 |       "error_rate": 0.0313,
48 |       "unindentified_rate": 0.0313,
49 |       "execution_time": 0.086
50 |     },
51 |     "512": {
52 |       "success_rate": 99.9688,
53 |       "error_rate": 0,
54 |       "unindentified_rate": 0.0313,
55 |       "execution_time": 0.1195
56 |     },
57 |     "1024": {
58 |       "success_rate": 100,
59 |       "error_rate": 0,
60 |       "unindentified_rate": 0,
61 |       "execution_time": 0.1449
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 78.4,
66 |     "max": 100,
67 |     "success_rate": 92.3654,
68 |     "error_rate": 1.6213,
69 |     "unindentified_rate": 6.0133,
70 |     "execution_time": 0.0711
71 |   },
72 |   "languages": {
73 |     "jpn": 100,
74 |     "kor": 100,
75 |     "hin": 99.43,
76 |     "eng": 99.18,
77 |     "deu": 97.58,
78 |     "fin": 96.3,
79 |     "cmn": 94.52,
80 |     "fra": 94.05,
81 |     "tur": 93.2,
82 |     "por": 91.89,
83 |     "rus": 89.69,
84 |     "nld": 89.58,
85 |     "spa": 87.79,
86 |     "ita": 85.9,
87 |     "heb": 84.06,
88 |     "ara": 78.4
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/languagedetect.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 24.75,
 5 |       "error_rate": 43.25,
 6 |       "unindentified_rate": 32,
 7 |       "execution_time": 0.0616
 8 |     },
 9 |     "16": {
10 |       "success_rate": 37.2188,
11 |       "error_rate": 33.5,
12 |       "unindentified_rate": 29.2812,
13 |       "execution_time": 0.088
14 |     },
15 |     "24": {
16 |       "success_rate": 50.1875,
17 |       "error_rate": 22.4063,
18 |       "unindentified_rate": 27.4063,
19 |       "execution_time": 0.1142
20 |     },
21 |     "36": {
22 |       "success_rate": 59.5,
23 |       "error_rate": 14.375,
24 |       "unindentified_rate": 26.125,
25 |       "execution_time": 0.164
26 |     },
27 |     "48": {
28 |       "success_rate": 66.2813,
29 |       "error_rate": 8.2813,
30 |       "unindentified_rate": 25.4375,
31 |       "execution_time": 0.2038
32 |     },
33 |     "64": {
34 |       "success_rate": 69.0938,
35 |       "error_rate": 6,
36 |       "unindentified_rate": 24.9063,
37 |       "execution_time": 0.2542
38 |     },
39 |     "128": {
40 |       "success_rate": 72.75,
41 |       "error_rate": 3.6563,
42 |       "unindentified_rate": 23.5938,
43 |       "execution_time": 0.4172
44 |     },
45 |     "256": {
46 |       "success_rate": 74.625,
47 |       "error_rate": 3.6875,
48 |       "unindentified_rate": 21.6875,
49 |       "execution_time": 0.7966
50 |     },
51 |     "512": {
52 |       "success_rate": 74.9063,
53 |       "error_rate": 4.5313,
54 |       "unindentified_rate": 20.5625,
55 |       "execution_time": 1.4548
56 |     },
57 |     "1024": {
58 |       "success_rate": 75.2115,
59 |       "error_rate": 5.6095,
60 |       "unindentified_rate": 19.1789,
61 |       "execution_time": 2.1704
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 0.01,
66 |     "max": 98.58,
67 |     "success_rate": 65.2835,
68 |     "error_rate": 11.2808,
69 |     "unindentified_rate": 23.4357,
70 |     "execution_time": 0.1896
71 |   },
72 |   "languages": {
73 |     "hin": 98.58,
74 |     "ara": 95,
75 |     "nld": 93.53,
76 |     "deu": 90.36,
77 |     "fra": 88.44,
78 |     "fin": 87.57,
79 |     "ita": 85.03,
80 |     "eng": 84,
81 |     "tur": 78.5,
82 |     "por": 76.5,
83 |     "spa": 71.75,
84 |     "rus": 63.43,
85 |     "kor": 0.0195,
86 |     "jpn": 0.01,
87 |     "cmn": 0.01,
88 |     "heb": 0.01
89 |   }
90 | }


--------------------------------------------------------------------------------
/data/bench/franc-min.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "size": {
 3 |     "10": {
 4 |       "success_rate": 0.125,
 5 |       "error_rate": 0.0625,
 6 |       "unindentified_rate": 99.8125,
 7 |       "execution_time": 0.0019
 8 |     },
 9 |     "16": {
10 |       "success_rate": 45.3438,
11 |       "error_rate": 48.4063,
12 |       "unindentified_rate": 6.25,
13 |       "execution_time": 0.0428
14 |     },
15 |     "24": {
16 |       "success_rate": 55.375,
17 |       "error_rate": 38.375,
18 |       "unindentified_rate": 6.25,
19 |       "execution_time": 0.052
20 |     },
21 |     "36": {
22 |       "success_rate": 66.9375,
23 |       "error_rate": 26.8125,
24 |       "unindentified_rate": 6.25,
25 |       "execution_time": 0.0688
26 |     },
27 |     "48": {
28 |       "success_rate": 73.375,
29 |       "error_rate": 20.375,
30 |       "unindentified_rate": 6.25,
31 |       "execution_time": 0.0835
32 |     },
33 |     "64": {
34 |       "success_rate": 75.9063,
35 |       "error_rate": 17.875,
36 |       "unindentified_rate": 6.2188,
37 |       "execution_time": 0.1015
38 |     },
39 |     "128": {
40 |       "success_rate": 82.5625,
41 |       "error_rate": 11.25,
42 |       "unindentified_rate": 6.1875,
43 |       "execution_time": 0.1628
44 |     },
45 |     "256": {
46 |       "success_rate": 85.1563,
47 |       "error_rate": 8.6875,
48 |       "unindentified_rate": 6.1563,
49 |       "execution_time": 0.3008
50 |     },
51 |     "512": {
52 |       "success_rate": 86.5313,
53 |       "error_rate": 7.4375,
54 |       "unindentified_rate": 6.0313,
55 |       "execution_time": 0.5538
56 |     },
57 |     "1024": {
58 |       "success_rate": 87.1514,
59 |       "error_rate": 6.9257,
60 |       "unindentified_rate": 5.9229,
61 |       "execution_time": 0.9752
62 |     }
63 |   },
64 |   "stats": {
65 |     "min": 0.01,
66 |     "max": 99.93,
67 |     "success_rate": 70.3891,
68 |     "error_rate": 23.1888,
69 |     "unindentified_rate": 6.422,
70 |     "execution_time": 0.084
71 |   },
72 |   "languages": {
73 |     "jpn": 99.93,
74 |     "kor": 99.8633,
75 |     "cmn": 99.35,
76 |     "deu": 94.18,
77 |     "ara": 91.88,
78 |     "fra": 87.32,
79 |     "nld": 87.21,
80 |     "eng": 81.42,
81 |     "por": 76.8,
82 |     "ita": 74.31,
83 |     "hin": 68.25,
84 |     "spa": 67.38,
85 |     "tur": 58.11,
86 |     "rus": 54.6,
87 |     "fin": 0.01,
88 |     "heb": 0.01
89 |   }
90 | }


--------------------------------------------------------------------------------
/docs/cli.md:
--------------------------------------------------------------------------------
 1 | # **TinyLD CLI**
 2 | 
 3 | Time to time, it can be easier to use the library from a terminal _(Example: testing or debugging)_
 4 | 
 5 | ```sh
 6 | tinyld This is the text that I want to check
 7 | # [ { lang: 'en', accuracy: 1 } ]
 8 | 
 9 | tinyld これはテストです
10 | # [ { lang: 'ja', accuracy: 1 } ]
11 | 
12 | tinyld Єсть на світі доля
13 | # [ { lang: 'uk', accuracy: 1 } ]
14 | ```
15 | 
16 | _Options_
17 | 
18 | - `--verbose` : Get an explanation of why **TinyLD** pick a language
19 | - `--only=en,ja,fr` : Restrict the detection to a subset of languages
20 | 
21 | Can also be run with:
22 | 
23 | - Npx: `npx tinyld [message]`
24 | - Yarn: `yarn tinyld [message]`
25 | - Bash: `./node_modules/.bin/tinyld [message]`
26 | 
27 | ## Verbose mode (debugging)
28 | 
29 | ```sh
30 | > yarn tinyld --verbose this is a text
31 | 
32 | [Pass 1] detectUniqueGrams of 1-grams [
33 |   't', 'h', 'i', 's',
34 |   'i', 's', 'a', 't',
35 |   'e', 'x', 't'
36 | ]
37 | [Pass 1] detectUniqueGrams of 2-grams [
38 |   ' t', 'th', 'hi', 'is',
39 |   's ', ' i', 'is', 's ',
40 |   ' a', 'a ', ' t', 'te',
41 |   'ex', 'xt', 't '
42 | ]
43 | 
44 | # ...
45 | 
46 | Gram 'a t' [
47 |   'ind = 43.830000000000005%',
48 |   'tgl = 15.5%',
49 |   'epo = 41.199999999999996%',
50 |   'spa = 90.59%',
51 |   'por = 53.47%',
52 |   'ita = 65.4%',
53 |   'srp = 30.320000000000004%',
54 |   'fin = 94.69999999999999%',
55 |   'hun = 100%',
56 |   'pol = 31.680000000000003%'
57 | ]
58 | Gram ' te' [
59 |   'ind = 18.060000000000002%',
60 |   'epo = 10.31%',
61 |   'eng = 9.44%',
62 |   'por = 97.13000000000001%',
63 |   'ita = 13.65%',
64 |   'nld = 100%',
65 |   'lat = 37.85%',
66 |   'srp = 3.6700000000000004%',
67 |   'fin = 22.67%',
68 |   'ron = 6.59%'
69 | ]
70 | Gram 'ext' [ 'eng = 59.14%', 'spa = 100%' ]
71 | Gram 'xt ' [ 'eng = 100%' ]
72 | Result this is a text [
73 |   { lang: 'en', accuracy: 0.7667, score: 2274.35 },
74 |   { lang: 'eo', accuracy: 0.3133, score: 6695.6 },
75 |   { lang: 'nl', accuracy: 0.3104, score: 6723.8 },
76 |   { lang: 'pt', accuracy: 0.2754, score: 7064.75 },
77 |   { lang: 'la', accuracy: 0.2662, score: 7154.35 }
78 | ]
79 | [
80 |   { lang: 'en', accuracy: 0.7667, score: 2274.35 },
81 |   { lang: 'eo', accuracy: 0.3133, score: 6695.6 },
82 |   { lang: 'nl', accuracy: 0.3104, score: 6723.8 },
83 |   { lang: 'pt', accuracy: 0.2754, score: 7064.75 },
84 |   { lang: 'la', accuracy: 0.2662, score: 7154.35 }
85 | ]
86 | ```
87 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # TinyLD
 2 | 
 3 | [![npm](https://img.shields.io/npm/v/tinyld)](https://www.npmjs.com/package/tinyld)
 4 | [![npm](https://img.shields.io/npm/dm/tinyld)](https://www.npmjs.com/package/tinyld)
 5 | [![CDN Download](https://data.jsdelivr.com/v1/package/npm/tinyld/badge)](https://www.jsdelivr.com/package/npm/tinyld)
 6 | [![License](https://img.shields.io/npm/l/tinyld.svg)](https://npmjs.org/package/tinyld)
 7 | 
 8 | ![logo](./banner.png)
 9 | 
10 | ## :tada: Description
11 | 
12 | **Tiny** **L**anguage **D**etector, simply detect the language of a unicode UTF-8 text:
13 | 
14 | - Pure JS, No api call, No dependencies (Node and Browser compatible)
15 | - Blazing fast and low memory footprint (unlike ML methods)
16 | - Train with dataset from [Tatoeba](https://tatoeba.org/en/) and [UDHR](https://unicode.org/udhr/)
17 | - Support [62 languages](./docs/langs.md) (24 for [the web version](./docs/light.md))
18 | - Reliable even for really short texts (chatbot, keywords, ...)
19 | - Support both ISO-639-1 & ISO-639-2
20 | - Available for NodeJS (`CommonJS` and `ESM`), Deno and Browser
21 | 
22 | ## Links
23 | 
24 | - [**Playground** - Try the library](https://komodojp.github.io/tinyld/)
25 | - [Play with some code](https://runkit.com/kefniark/tinyld)
26 | - [Getting Started](./docs/install.md)
27 | - [Supported Languages](./docs/langs.md)
28 | - [Algorithm](./docs/algorithm.md)
29 | - [Frequently Asked Questions](./docs/faq.md)
30 | 
31 | ---
32 | 
33 | ## :floppy_disk: Getting Started
34 | 
35 | ### Install
36 | 
37 | ```sh
38 | yarn add tinyld # or npm install --save tinyld
39 | ```
40 | 
41 | [Install Documentation](./docs/install.md)
42 | 
43 | ---
44 | 
45 | ### :page_facing_up: **TinyLD API**
46 | 
47 | ```js
48 | import { detect, detectAll } from 'tinyld'
49 | 
50 | // Detect
51 | detect('これは日本語です.') // ja
52 | detect('and this is english.') // en
53 | 
54 | // DetectAll
55 | detectAll('ceci est un text en francais.')
56 | // [ { lang: 'fr', accuracy: 0.5238 }, { lang: 'ro', accuracy: 0.3802 }, ... ]
57 | ```
58 | 
59 | [API Documentation](./docs/api.md)
60 | 
61 | ---
62 | 
63 | ### :paperclip: **TinyLD CLI**
64 | 
65 | ```bash
66 | tinyld This is the text that I want to check
67 | # [ { lang: 'en', accuracy: 1 } ]
68 | ```
69 | 
70 | [More Information](./docs/cli.md)
71 | 
72 | ---
73 | 
74 | ## :chart_with_upwards_trend: Performance
75 | 
76 | Here is a comparison of **Tinyld** against other popular libraries.
77 | 
78 | ![SVG Graph](./docs/overall.svg)
79 | 
80 | To summary in one sentence:
81 | 
82 | > Better, Faster, Smaller
83 | 
84 | [More Benchmark Information](./docs/benchmark.md)
85 | 
86 | ---
87 | 
88 | ## Developer
89 | 
90 | You want to **Contribute** or **Open a PR**, it's recommend to take a look [at the dev documentation](./docs/dev.md)
91 | 


--------------------------------------------------------------------------------
/docs/langs.md:
--------------------------------------------------------------------------------
 1 | # 62 Supported Languages
 2 | 
 3 | This list is auto-generated from the code and up-to-date.
 4 | 
 5 | ## Africa (4)
 6 | 
 7 | - **Afrikaans** (ISO Codes: `af` `afr`)
 8 | - **Amharic** (ISO Codes: `am` `amh`)
 9 | - **Berber** (ISO Codes: `ber` `ber`)
10 | - **Kirundi** (ISO Codes: `rn` `run`)
11 | 
12 | ## Asia (6)
13 | 
14 | - **Burmese** (ISO Codes: `my` `mya`)
15 | - **Indonesian** (ISO Codes: `id` `ind`)
16 | - **Khmer** (ISO Codes: `km` `khm`)
17 | - **Tagalog** (ISO Codes: `tl` `tgl`)
18 | - **Thai** (ISO Codes: `th` `tha`)
19 | - **Vietnamese** (ISO Codes: `vi` `vie`)
20 | 
21 | ## Asia-east (3)
22 | 
23 | - **Chinese** (ISO Codes: `zh` `cmn`)
24 | - **Japanese** (ISO Codes: `ja` `jpn`)
25 | - **Korean** (ISO Codes: `ko` `kor`)
26 | 
27 | ## Asia-south (7)
28 | 
29 | - **Bengali** (ISO Codes: `bn` `ben`)
30 | - **Gujarati** (ISO Codes: `gu` `guj`)
31 | - **Hindi** (ISO Codes: `hi` `hin`)
32 | - **Kannada** (ISO Codes: `kn` `kan`)
33 | - **Tamil** (ISO Codes: `ta` `tam`)
34 | - **Telugu** (ISO Codes: `te` `tel`)
35 | - **Urdu** (ISO Codes: `ur` `urd`)
36 | 
37 | ## Europe (6)
38 | 
39 | - **Czech** (ISO Codes: `cs` `ces`)
40 | - **Greek** (ISO Codes: `el` `ell`)
41 | - **Latin** (ISO Codes: `la` `lat`)
42 | - **Macedonian** (ISO Codes: `mk` `mkd`)
43 | - **Serbian** (ISO Codes: `sr` `srp`)
44 | - **Slovak** (ISO Codes: `sk` `slk`)
45 | 
46 | ## Europe-east (10)
47 | 
48 | - **Belarusian** (ISO Codes: `be` `bel`)
49 | - **Bulgarian** (ISO Codes: `bg` `bul`)
50 | - **Estonian** (ISO Codes: `et` `est`)
51 | - **Hungarian** (ISO Codes: `hu` `hun`)
52 | - **Latvian** (ISO Codes: `lv` `lvs`)
53 | - **Lithuanian** (ISO Codes: `lt` `lit`)
54 | - **Polish** (ISO Codes: `pl` `pol`)
55 | - **Romanian** (ISO Codes: `ro` `ron`)
56 | - **Russian** (ISO Codes: `ru` `rus`)
57 | - **Ukrainian** (ISO Codes: `uk` `ukr`)
58 | 
59 | ## Europe-north (5)
60 | 
61 | - **Danish** (ISO Codes: `da` `dan`)
62 | - **Finnish** (ISO Codes: `fi` `fin`)
63 | - **Icelandic** (ISO Codes: `is` `isl`)
64 | - **Norwegian** (ISO Codes: `no` `nob`)
65 | - **Swedish** (ISO Codes: `sv` `swe`)
66 | 
67 | ## Europe-west (8)
68 | 
69 | - **Dutch** (ISO Codes: `nl` `nld`)
70 | - **English** (ISO Codes: `en` `eng`)
71 | - **French** (ISO Codes: `fr` `fra`)
72 | - **German** (ISO Codes: `de` `deu`)
73 | - **Irish** (ISO Codes: `ga` `gle`)
74 | - **Italian** (ISO Codes: `it` `ita`)
75 | - **Portuguese** (ISO Codes: `pt` `por`)
76 | - **Spanish** (ISO Codes: `es` `spa`)
77 | 
78 | ## Middle-east (10)
79 | 
80 | - **Arabic** (ISO Codes: `ar` `ara`)
81 | - **Armenian** (ISO Codes: `hy` `hye`)
82 | - **Hebrew** (ISO Codes: `he` `heb`)
83 | - **Kazakh** (ISO Codes: `kk` `kaz`)
84 | - **Mongolian** (ISO Codes: `mn` `mon`)
85 | - **Persian** (ISO Codes: `fa` `pes`)
86 | - **Tatar** (ISO Codes: `tt` `tat`)
87 | - **Turkish** (ISO Codes: `tr` `tur`)
88 | - **Turkmen** (ISO Codes: `tk` `tuk`)
89 | - **Yiddish** (ISO Codes: `yi` `yid`)
90 | 
91 | ## Other (3)
92 | 
93 | - **Esperanto** (ISO Codes: `eo` `epo`)
94 | - **Klingon** (ISO Codes: `tlh` `tlh`)
95 | - **Volapuk** (ISO Codes: `vo` `vol`)
96 | 


--------------------------------------------------------------------------------
/docs/algorithm.md:
--------------------------------------------------------------------------------
 1 | # Algorithm
 2 | 
 3 | This library uses a variant of the usual N-gram algorithm, which gives fast and good results.
 4 | 
 5 | Most libraries are directly using a bayesian scoring algorithm to identify a text language. But TinyLD, decided to add few steps before and after, trying to mimic human logic and identify language with their unique character patterns or word usage.
 6 | 
 7 | This is similar to what ML methods use, that's why this library has a training phase too. The goal is to find which "features" or "n-gram" are the more useful for detection without hardcoding any language specific rules. The heavy lifting is done during build time, so at runtime it can be fast and efficient.
 8 | 
 9 | ## How it works ?
10 | 
11 | The string will be split into chunks based on punctuation. Each chunk will be evaluated separately and results merged later weighted with the chunk size.
12 | 
13 | This allow to handle mixed language content
14 | 
15 | ```js
16 | 'This is a text in english "おはよう" and we can continue to write (and this is english too)'
17 | ```
18 | 
19 | ```js
20 | 'this is a text in english', // => will be detected as EN
21 |   'おはよう', // => will be detected as JA
22 |   'and we can continue to write',
23 |   'and this is english too'
24 | ```
25 | 
26 | Then each chunk will be evaluated with the following method:
27 | 
28 | ---
29 | 
30 | ### **1) First pass**: Unique Character Detection
31 | 
32 | Some languages like japanese or korean can be identified right away, just based on their characters or punctuation and dont even need to reach the scoring algorithm.
33 | 
34 | **Example**:
35 | 
36 | - `も` is japanese
37 | - `두` is korean
38 | - `où` is french
39 | 
40 | This identification is done on different sizes of grams (including 1-gram and 2-gram), which give better results than other libraries on short texts.
41 | 
42 | **This pass is**:
43 | 
44 | - really fast (a lookup in a map)
45 | - return only one locale (local detected this way are really accurate)
46 | 
47 | ---
48 | 
49 | ### **2) Second pass**: Gram Detection (2-gram, 3-gram, ...)
50 | 
51 | Most of the other libraries are only using this part.
52 | More traditional method of statistical analysis on grams.
53 | Split each word in 4-gram and for each of them try to find languages that match and score them.
54 | 
55 | **This pass is**:
56 | 
57 | - probabilistic
58 | - return multiples locale and they have to be scored and sorted
59 | - remove grams already covered by previous step (to save space)
60 | 
61 | ---
62 | 
63 | ## Why doing all that ? Is gram analysis not good enough ?
64 | 
65 | Individually, the accuracy of each method is not really high
66 | 
67 | - Unique character detection: ~65%
68 | - Gram detection: ~85%
69 | 
70 | But what allows this library to be so good, is that those detection methods are complementary and work together.
71 | 
72 | For example:
73 | 
74 | - Japanese accuracy is good thanks to character detection (JA ~99% but EN ~15%)
75 | - English accuracy is good thanks to word detection (JA ~1.5% but EN ~98%)
76 | 
77 | Which is why together those methods get an overall accuracy > 95%
78 | 


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions
 2 | 
 3 | - [Language Detection Error](#my-text-is-detected-in-the-wrong-language)
 4 | - [Cand I have a custom version](#can-i-have-a-version-specific-for-my-app-and-my-needs)
 5 | - [Short text detection issues](#can-tinyld-identify-short-strings)
 6 | - [Live Chat usage](#can-i-use-tinyld-for-an-application-like-a-chat-even-if-texts-are-short)
 7 | 
 8 | ---
 9 | 
10 | ## My text is detected in the wrong language
11 | 
12 | It's sad to hear, but it's not unusual.
13 | 
14 | As we can see [here](https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md#libraries), **Tinyld** is good but not perfect. Overall 1~2% of the time it will get it wrong.
15 | 
16 | The two things which usually increase error rate:
17 | 
18 | - short inputs, try to make it longer
19 | - similar language (like spanish and catalan)
20 | - generic names/brand which may appears in multiple language corpus
21 | 
22 | ---
23 | 
24 | ## Can I have a version specific for my app and my needs
25 | 
26 | Everything in life is about tradeoff.
27 | 
28 | Tinyld was designed to be accurate, small and fast.
29 | Based on how much space and resource you are ready to spend, we provide different flavor
30 | 
31 | - **Tinyld** : The general one (~500KB) which detect 64 languages
32 | - **Tinyld Light** : Mostly for browser usage (~70KB) which detect 24 languages
33 | - **Tinyld Heavy (Soon)** : The one for backend usage (few MB) which focus on accuracy only
34 | 
35 | To select the one you want, simply change your import
36 | 
37 | ```ts
38 | import { detect } from 'tinyld'
39 | import { detect } from 'tinyld/light'
40 | import { detect } from 'tinyld/heavy'
41 | ```
42 | 
43 | ---
44 | 
45 | ## Can Tinyld identify short strings?
46 | 
47 | If by short you mean one or two word with a good accuracy, the answer is most likely **No**.
48 | 
49 | The key point here is to understand algorithms behind language detection.
50 | 
51 | - How can you detect a language, without embedding and checking a whole dictionary for each language?
52 | - Even just between 2 or 3 languages, how would you do it? Handcraft regexp for specific languages?
53 | - How can you scale up this method easily to more languages? Even to languages you dont speak or understand?
54 | 
55 | There are multiple approaches to solve this problem, but the two main ones are AI and statistics. And the general idea is to recognize some patterns or succession of letters that are specific for each language. ([n-gram](https://en.wikipedia.org/wiki/N-gram))
56 | 
57 | **Good part**:
58 | 
59 | - We don't need to understand a language syntax to be able to detect it
60 | - We can extend this method to more language fairly easily
61 | - The signature of a language can be quite small only few KB
62 | 
63 | **Bad part**:
64 | 
65 | - It requires a certain text size to get a good detection accuracy and valuable n-grams
66 | - Common short words are usually the best for detection ("the", "or", "do", "this"), which lead to better results on sentence and not on single words
67 | - It requires a clean corpus for training
68 | - Mixed language content can be hard to detect
69 | 
70 | We are always trying to improve our process and detection rate, you can find some benchmark [related to this](https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md#accuracy-by-text-length).
71 | But to give some numbers:
72 | 
73 | - Tinyld usually pass the ~95% detection accuracy threshold around ~24 characters
74 | - It fall at ~80% for 12 characters (barely usable)
75 | - Less than 10 characters it's just random
76 | 
77 | We recommend you to use the [TinyLD Playground](https://komodojp.github.io/tinyld/) to do some tests, and see how the accuracy increase with text length
78 | 
79 | ---
80 | 
81 | ## Can I use tinyld for an application like a chat, even if texts are short?
82 | 
83 | Yes you can, and this is why it was built originally.
84 | 
85 | One of the easy ways to workaround the size issue is to keep a context, a user is unlikely to change language abruptly in the middle of a discussion. And multiple users usually chat in a common language.
86 | So you can keep some buffer (like the last 256 characters of this user in this channel) and check this and not just the last message.
87 | 
88 | This gives stability and more accurate results to the detection.
89 | 


--------------------------------------------------------------------------------
/docs/benchmark.md:
--------------------------------------------------------------------------------
  1 | # NodeJS Language Detection Benchmark :rocket:
  2 | 
  3 | - This kind of benchmark is not perfect and % can vary over time, but it gives a good idea of overall performances
  4 | - Language evaluated in this benchmark:
  5 |   - Asia: `jpn`, `cmn`, `kor`, `hin`
  6 |   - Europe: `fra`, `spa`, `por`, `ita`, `nld`, `eng`, `deu`, `fin`, `rus`
  7 |   - Middle east: , `tur`, `heb`, `ara`
  8 | - This page and graphs are auto-generated from the code
  9 | 
 10 | ---
 11 | 
 12 | ## Libraries
 13 | 
 14 | Here is the list of libraries in this benchmark
 15 | 
 16 | | Library          | Script                      | Language | Properly Identified | Improperly identified | Not identified | Avg Execution Time | Disk Size |
 17 | | ---------------- | --------------------------- | -------- | ------------------- | --------------------- | -------------- | ------------------ | --------- |
 18 | | **TinyLD Heavy** | `yarn bench:tinyld-heavy`   | 64       | 99.249%             | 0.7478%               | 0.0032%        | 0.096ms.           | 2.0MB     |
 19 | | **TinyLD**       | `yarn bench:tinyld`         | 64       | 98.5231%            | 1.3712%               | 0.1057%        | 0.1191ms.          | 580KB     |
 20 | | **TinyLD Light** | `yarn bench:tinyld-light`   | 24       | 97.8778%            | 1.9842%               | 0.138%         | 0.0947ms.          | 68KB      |
 21 | | \*\*langdetect   | `yarn bench:langdetect`     | 53       | 95.675%             | 4.325%                | 0%             | 0.3647ms.          | 1.8MB     |
 22 | | node-cld         | `yarn bench:cld`            | 160      | 92.3654%            | 1.6213%               | 6.0133%        | 0.0711ms.          | > 10MB    |
 23 | | franc            | `yarn bench:franc`          | 187      | 74.2577%            | 25.7423%              | 0%             | 0.2242ms.          | 267KB     |
 24 | | franc-min        | `yarn bench:franc-min`      | 82       | 70.3891%            | 23.1888%              | 6.422%         | 0.084ms.           | 119KB     |
 25 | | franc-all        | `yarn bench:franc-all`      | 403      | 66.7081%            | 33.2919%              | 0%             | 0.4763ms.          | 509KB     |
 26 | | languagedetect   | `yarn bench:languagedetect` | 52       | 65.2835%            | 11.2808%              | 23.4357%       | 0.1896ms.          | 240KB     |
 27 | 
 28 | ---
 29 | 
 30 | ## Global Accuracy
 31 | 
 32 | ![Benchmark](./overall.svg)
 33 | 
 34 | We see two group of libraries
 35 | 
 36 | - `tinyld`, `langdetect` and `cld` over 90% accuracy
 37 | - `franc` and `languagedetect` under 75% accuracy
 38 | 
 39 | ## Per Language
 40 | 
 41 | ![Language](./language.svg)
 42 | 
 43 | We see big differences between languages:
 44 | 
 45 | - **Japanese** or **Korean** are almost at 100% for every libs (lot of unique characters)
 46 | - **Spanish** and **Portuguese** are really close and cause more false-positive and an higher error-rate
 47 | 
 48 | ## Accuracy By Text length
 49 | 
 50 | Most libraries are using statistical analysis, so longer is the input text, better will be the detection.
 51 | So we can often see quotes like this in those library documentations.
 52 | 
 53 | > Make sure to pass it big documents to get reliable results.
 54 | 
 55 | Let's see if this statement is true, and how those libraries behave for different input size (from small to long)
 56 | ![Size](./length.svg)
 57 | 
 58 | So the previous quote is right, over 512 characters all the libs become accurate enough.
 59 | 
 60 | But for a ~95% accuracy threshold:
 61 | 
 62 | - `tinyld` (green) reaches it around 24 characters
 63 | - `langdetect` (cyan) and `cld` (orange) reach it around 48 characters
 64 | 
 65 | ## Execution Time
 66 | 
 67 | ![Size](./exec_time.svg)
 68 | 
 69 | Here we can notice few things about performance:
 70 | 
 71 | - `langdetect` (cyan) and `franc` (pink) seems to slow down at a similar rate
 72 | - `tinyld` (green) slow down but at a really flat rate
 73 | - `cld` (orange) is definitely the fastest and doesn't show any apparent slow down
 74 | 
 75 | But we've seen previously that some of those libraries need more than 256 characters to be accurate.
 76 | It means they start to slow down at the same time they start to give decent results.
 77 | 
 78 | ---
 79 | 
 80 | ## **Conclusion**
 81 | 
 82 | ### Recommended :thumbsup:
 83 | 
 84 | #### - By platform :computer:
 85 | 
 86 | - For **NodeJS**: `TinyLD`, `langdetect` or `node-cld` (fast and accurate)
 87 | - For **Browser**: `TinyLD Light` or `franc-min` (small, decent accuracy, franc is less accurate but support more languages)
 88 | 
 89 | #### - By usage :speech_balloon:
 90 | 
 91 | - Short text (chatbot, keywords, database, ...): `TinyLD` or `langdetect`
 92 | - Long text (documents, webpage): `node-cld` or `TinyLD`
 93 | 
 94 | ### Not recommended :thumbsdown:
 95 | 
 96 | - `franc-all` is the worst in terms of accuracy, not a surprise because it tries to detect 400+ languages with only 3-grams. A technical demo to put big numbers but useless for real usage, even a language like english barely reaches ~45% detection rate.
 97 | - `languagedetect` is light but just not accurate enough
 98 | 
 99 | ---
100 | 
101 | ## Last word :raising_hand:
102 | 
103 | Thanks for reading this article, those metrics are really helpful for the development of `tinyld`.
104 | It's used in the development to see the impact of every modification and features.
105 | 
106 | If you want to contribute or see another library in this benchmark, [open an issue](https://github.com/komodojp/tinyld/issues)
107 | 


--------------------------------------------------------------------------------
/src/tokenizer.ts:
--------------------------------------------------------------------------------
  1 | import { cleanString, normalize } from './clean'
  2 | import { approximate, DetectOption, ILangProfiles, langs, toISO2, TRAINING_UNIQUE_GRAMS } from './core'
  3 | 
  4 | const chunk_regexp = /([,，、。!¿?！？":;()「」{}„“«»”"“<>⋯《》*]|[.[\]\\])+/
  5 | const word_regexp = /[ ]+/
  6 | 
  7 | export function chunkTokenizer(text: string): string[] {
  8 |   return text.split(chunk_regexp)
  9 | }
 10 | 
 11 | export function wordTokenizer(text: string): string[] {
 12 |   return text.split(word_regexp)
 13 | }
 14 | 
 15 | export function ngramTokenizer(text: string, length: number, padding = true): string[] {
 16 |   const ngramsArray = []
 17 |   const array = padding ? ' '.repeat(length - 1) + text + ' '.repeat(length - 1) : text
 18 | 
 19 |   for (let i = 0; i < array.length - (length - 1); i++) {
 20 |     const subNgramsArray = []
 21 | 
 22 |     let consecutiveSpace = 0
 23 |     for (let j = 0; j < length; j++) {
 24 |       if (array[i + j] === ' ') consecutiveSpace += 1
 25 |       else consecutiveSpace = 0
 26 |       if (consecutiveSpace > 1) continue
 27 |       subNgramsArray.push(array[i + j])
 28 |     }
 29 | 
 30 |     const str = subNgramsArray.join('')
 31 |     if (str.trim().length > 0 && str.length === length) ngramsArray.push(str)
 32 |   }
 33 | 
 34 |   return ngramsArray
 35 | }
 36 | 
 37 | export function detectUniqueGrams(
 38 |   text: string,
 39 |   profiles: ILangProfiles,
 40 |   keys: Set<string>,
 41 |   options: DetectOption
 42 | ): string {
 43 |   for (const rank of TRAINING_UNIQUE_GRAMS) {
 44 |     const grams = ngramTokenizer(text, rank)
 45 |     for (const gram of grams) {
 46 |       if (!keys.has(gram)) continue
 47 | 
 48 |       const country = toISO2(profiles.uniques[gram])
 49 |       if (options.only.length > 0) {
 50 |         if (!options.only.includes(country)) continue
 51 |       }
 52 |       if (options.verbose) console.log(`[Pass 1] detectUniqueGrams ${rank}-grams - match '${gram}' to ${country}`)
 53 |       return country
 54 |     }
 55 |   }
 56 |   return ''
 57 | }
 58 | 
 59 | export function detectPotentialGrams(text: string, profiles: ILangProfiles, options: DetectOption): string {
 60 |   const res = detectStatsGrams(text, profiles, options)
 61 |   if (res.length > 0) return res[0].lang
 62 |   return ''
 63 | }
 64 | 
 65 | export function detectStatsGrams(
 66 |   text: string,
 67 |   profiles: ILangProfiles,
 68 |   options: DetectOption
 69 | ): { lang: string; accuracy: number }[] {
 70 |   const langScores = new Map<string, number>()
 71 | 
 72 |   const grams = TRAINING_UNIQUE_GRAMS.map((x) => ngramTokenizer(text, x)).flat()
 73 |   if (options.verbose) console.log('[Pass 2] DetectPotentialGrams', text, grams)
 74 |   const langSet = new Set(
 75 |     [...langs.values()].filter((x) => {
 76 |       if (options.only.length > 0) return options.only.includes(x) || options.only.includes(toISO2(x))
 77 |       return true
 78 |     })
 79 |   )
 80 | 
 81 |   langSet.forEach((x) => langScores.set(x, 0))
 82 |   for (const gramValue of grams) {
 83 |     const gram = normalize(gramValue)
 84 |     const gramStat = profiles.multiples[gram]
 85 |     if (!gramStat) continue
 86 | 
 87 |     const gramLangs = new Set(Object.keys(gramStat))
 88 |     const debug: string[] = []
 89 |     for (const lang of langSet) {
 90 |       if (gramLangs.has(lang)) {
 91 |         langScores.set(lang, (langScores.get(lang) || 0) + (gramStat[lang] * gram.length) / 4)
 92 |         debug.push(`${lang} = ${(gramStat[lang] / 1024) * 100}%`)
 93 |       }
 94 |     }
 95 |     if (options.verbose && debug.length > 0) console.log(`Gram '${gram}'`, debug)
 96 |   }
 97 | 
 98 |   const entries = [...langScores.entries()]
 99 |   entries.sort((a, b) => b[1] - a[1])
100 |   const max = Math.max(...entries.map((x) => x[1])) || 1
101 |   const result = entries.slice(0, 8).map((x) => {
102 |     return {
103 |       lang: toISO2(x[0]),
104 |       accuracy: 1 - approximate((max - x[1]) / max),
105 |       score: approximate(x[1])
106 |     }
107 |   })
108 |   if (options.verbose) console.log(`Result`, text, result)
109 |   return result
110 | }
111 | 
112 | export function detectAllStats(
113 |   text: string,
114 |   options: DetectOption,
115 |   profiles: ILangProfiles,
116 |   uniqueKeys: Set<string>
117 | ): { lang: string; accuracy: number }[] {
118 |   let chunks = chunkTokenizer(text)
119 |   chunks = chunks.map((x) => cleanString(x)).filter((x) => !!x)
120 |   chunks.sort((a, b) => b.length - a.length)
121 |   chunks = chunks.slice(0, 7)
122 |   if (options.verbose) console.log('Analize chunks', chunks)
123 | 
124 |   let size = 0
125 |   const results: { [lang: string]: number } = {}
126 |   for (const chunk of chunks) {
127 |     // pass 1 - unique character detection
128 |     const res = detectUniqueGrams(chunk, profiles, uniqueKeys, options)
129 |     if (res) {
130 |       results[res] = (results[res] || 0) + 1 * chunk.length
131 |       size += chunk.length
132 |       continue
133 |     }
134 | 
135 |     const words = wordTokenizer(chunk)
136 |     for (const word of words) {
137 |       // pass 2 - statistical 3-gram analysis
138 |       const res2 = detectStatsGrams(word, profiles, options)
139 |       res2.forEach((x) => {
140 |         results[x.lang] = (results[x.lang] || 0) + x.accuracy
141 |       })
142 |       size += word.length
143 |     }
144 |   }
145 | 
146 |   // merge result
147 |   const entries = Object.entries(results).filter((x) => x[1] > 0)
148 |   entries.sort((a, b) => b[1] - a[1])
149 |   const result = entries.map((x) => {
150 |     return { lang: x[0], accuracy: x[1] / size }
151 |   })
152 |   if (options.verbose) console.log('Merge Results', result)
153 |   return result
154 | }
155 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "tinyld",
  3 |   "description": "Simple and Performant Language detection library (pure JS and zero dependencies)",
  4 |   "version": "1.3.4",
  5 |   "license": "MIT",
  6 |   "exports": {
  7 |     "./light": {
  8 |       "require": "./dist/tinyld.light.node.js",
  9 |       "import": "./dist/tinyld.light.node.mjs",
 10 |       "browser": "./dist/tinyld.light.browser.js",
 11 |       "types": "./dist/tinyld.light.node.d.ts"
 12 |     },
 13 |     "./heavy": {
 14 |       "require": "./dist/tinyld.heavy.node.js",
 15 |       "import": "./dist/tinyld.heavy.node.mjs",
 16 |       "browser": "./dist/tinyld.heavy.browser.js",
 17 |       "types": "./dist/tinyld.heavy.node.d.ts"
 18 |     },
 19 |     ".": {
 20 |       "require": "./dist/tinyld.normal.node.js",
 21 |       "import": "./dist/tinyld.normal.node.mjs",
 22 |       "browser": "./dist/tinyld.normal.browser.js",
 23 |       "types": "./dist/tinyld.normal.node.d.ts"
 24 |     }
 25 |   },
 26 |   "typesVersions": {
 27 |     "*": {
 28 |       "light": [
 29 |         "./dist/tinyld.light.node.d.ts"
 30 |       ],
 31 |       "heavy": [
 32 |         "./dist/tinyld.heavy.node.d.ts"
 33 |       ],
 34 |       "*": [
 35 |         "./dist/tinyld.normal.node.d.ts"
 36 |       ]
 37 |     }
 38 |   },
 39 |   "bin": {
 40 |     "tinyld": "./bin/tinyld.js",
 41 |     "tinyld-light": "./bin/tinyld-light.js",
 42 |     "tinyld-heavy": "./bin/tinyld-heavy.js"
 43 |   },
 44 |   "keywords": [
 45 |     "lang",
 46 |     "language",
 47 |     "language detection",
 48 |     "natural-language",
 49 |     "detect",
 50 |     "detector",
 51 |     "n-gram"
 52 |   ],
 53 |   "repository": {
 54 |     "type": "git",
 55 |     "url": "https://github.com/komodojp/tinyld.git"
 56 |   },
 57 |   "author": {
 58 |     "name": "Kevin Destrem",
 59 |     "email": "kevin_destrem@komodo.jp"
 60 |   },
 61 |   "scripts": {
 62 |     "train": "run-p train:*",
 63 |     "train:normal": "cross-env TINYLD_CONFIG=normal ts-node src/train.ts",
 64 |     "train:light": "cross-env TINYLD_CONFIG=light ts-node src/train.ts",
 65 |     "train:heavy": "cross-env TINYLD_CONFIG=heavy ts-node src/train.ts",
 66 |     "bench": "run-s bench:*",
 67 |     "bench:tinyld": "cross-env TINYLD_CONFIG=normal ts-node src/benchmark/tinyld.ts",
 68 |     "bench:tinyld-light": "cross-env TINYLD_CONFIG=light ts-node src/benchmark/tinyld_light.ts",
 69 |     "bench:tinyld-heavy": "cross-env TINYLD_CONFIG=heavy ts-node src/benchmark/tinyld_heavy.ts",
 70 |     "bench:cld": "ts-node src/benchmark/cld.ts",
 71 |     "bench:franc": "ts-node src/benchmark/franc.ts",
 72 |     "bench:franc-all": "ts-node src/benchmark/franc-all.ts",
 73 |     "bench:franc-min": "ts-node src/benchmark/franc-min.ts",
 74 |     "bench:langdetect": "ts-node src/benchmark/langdetect.ts",
 75 |     "bench:languagedetect": "ts-node src/benchmark/languagedetect.ts",
 76 |     "build": "run-p build:*",
 77 |     "build:normal-node": "esbuild src/index.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"normal\\\"} --bundle --charset=utf8 --minify --platform=node --outfile=dist/tinyld.normal.node.js",
 78 |     "build:normal-node-esm": "esbuild src/index.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"normal\\\"} --bundle --charset=utf8 --minify --platform=node --format=esm --outfile=dist/tinyld.normal.node.mjs",
 79 |     "build:normal-web": "esbuild src/index.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"normal\\\"} --bundle --charset=utf8 --platform=browser --format=esm --outfile=dist/tinyld.normal.browser.js",
 80 |     "build:light-node": "esbuild src/index_light.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"light\\\"} --bundle --charset=utf8 --minify --platform=node --outfile=dist/tinyld.light.node.js",
 81 |     "build:light-node-esm": "esbuild src/index_light.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"light\\\"} --bundle --charset=utf8 --minify --platform=node --format=esm --outfile=dist/tinyld.light.node.mjs",
 82 |     "build:light-web": "esbuild src/index_light.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"light\\\"} --bundle --charset=utf8 --minify --platform=browser --format=esm --outfile=dist/tinyld.light.browser.js",
 83 |     "build:heavy-node": "esbuild src/index_heavy.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"heavy\\\"} --bundle --charset=utf8 --minify --platform=node --outfile=dist/tinyld.heavy.node.js",
 84 |     "build:heavy-node-esm": "esbuild src/index_heavy.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"heavy\\\"} --bundle --charset=utf8 --minify --platform=node --format=esm --outfile=dist/tinyld.heavy.node.mjs",
 85 |     "build:heavy-web": "esbuild src/index_heavy.ts --define:process.env={\\\"TINYLD_CONFIG\\\":\\\"heavy\\\"} --bundle --charset=utf8 --minify --platform=browser --format=esm --outfile=dist/tinyld.heavy.browser.js",
 86 |     "build:type": "run-s build:type:*",
 87 |     "build:type:build": "tsc --emitDeclarationOnly --declaration --project tsconfig.json --outDir dist",
 88 |     "build:type:normal": "mv ./dist/index.d.ts ./dist/tinyld.normal.node.d.ts && cp ./dist/tinyld.normal.node.d.ts ./dist/tinyld.normal.browser.d.ts",
 89 |     "build:type:light": "mv ./dist/index_light.d.ts ./dist/tinyld.light.node.d.ts && cp ./dist/tinyld.light.node.d.ts ./dist/tinyld.light.browser.d.ts",
 90 |     "build:type:heavy": "mv ./dist/index_heavy.d.ts ./dist/tinyld.heavy.node.d.ts && cp ./dist/tinyld.heavy.node.d.ts ./dist/tinyld.heavy.browser.d.ts",
 91 |     "build:post": "yarn gen:svg && yarn gen:mkd && yarn lint",
 92 |     "gen:mkd": "node ./utils/mkdown.js",
 93 |     "gen:svg": "node ./utils/index.js",
 94 |     "lint": "eslint --ext .js,.ts --fix ./ && prettier --config .prettierrc --ignore-path .prettierignore --write \"**/*.{ts,js,md}\"",
 95 |     "test": "run-p test:*",
 96 |     "test:unit": "uvu tests",
 97 |     "test:dependencies": "yarn audit --level high || echo \"Run 'yarn update' to interactively update dependencies for this project\"",
 98 |     "test:lint": "eslint --ext .js,.ts ./ && prettier --config .prettierrc --ignore-path .prettierignore --check \"**/*.{ts,js}\"",
 99 |     "test:types": "tsc --noEmit"
100 |   },
101 |   "devDependencies": {
102 |     "@types/node": "^18.0.0",
103 |     "@typescript-eslint/eslint-plugin": "^4.28.5",
104 |     "@typescript-eslint/parser": "^4.28.5",
105 |     "chartist-svg": "^0.2.3",
106 |     "cld": "^2.7.0",
107 |     "cross-env": "^7.0.3",
108 |     "esbuild": "^0.14.0",
109 |     "eslint": "^7.32.0",
110 |     "franc": "^5.0.0",
111 |     "franc-all": "^5.0.0",
112 |     "franc-min": "^5.0.0",
113 |     "langdetect": "^0.2.1",
114 |     "languagedetect": "^2.0.0",
115 |     "npm-run-all": "^4.1.5",
116 |     "p-limit": "3.1.0",
117 |     "prettier": "^2.3.2",
118 |     "ts-node": "^10.2.0",
119 |     "typescript": "^4.3.5",
120 |     "uvu": "^0.5.1"
121 |   },
122 |   "engines": {
123 |     "node": ">= 12.10.0",
124 |     "npm": ">= 6.12.0",
125 |     "yarn": ">= 1.20.0"
126 |   }
127 | }
128 | 


--------------------------------------------------------------------------------
/utils/mkdown.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs')
  2 | const { langRegion, langName, supportedLanguages, toISO2 } = require('../dist/tinyld.normal.node.js')
  3 | 
  4 | function getJSON(filepath) {
  5 |   return JSON.parse(fs.readFileSync(filepath))
  6 | }
  7 | 
  8 | function capitalizeFirstLetter(string) {
  9 |   return string.charAt(0).toUpperCase() + string.slice(1)
 10 | }
 11 | 
 12 | async function generateDocLangs() {
 13 |   let content = ''
 14 |   const regions = [...new Set(supportedLanguages.map((x) => langRegion(x)))]
 15 |   regions.sort()
 16 |   for (const reg of regions) {
 17 |     const langs = supportedLanguages.filter((x) => langRegion(x) === reg)
 18 | 
 19 |     content += `\n## ${capitalizeFirstLetter(reg)} (${langs.length})\n`
 20 |     langs.sort((a, b) => langName(a).localeCompare(langName(b)))
 21 |     langs.forEach((x) => {
 22 |       content += `- **${langName(x)}** (ISO Codes: \`${toISO2(x)}\` \`${x}\`)\n`
 23 |     })
 24 |   }
 25 | 
 26 |   fs.writeFileSync(
 27 |     './docs/langs.md',
 28 |     `# ${supportedLanguages.length} Supported Languages
 29 | This list is auto-generated from the code and up-to-date.
 30 | ${content}`
 31 |   )
 32 | }
 33 | 
 34 | async function generateDocBenchmark() {
 35 |   const data = {
 36 |     tinyld: getJSON('./data/bench/tinyld.json'),
 37 |     'tinyld-light': getJSON('./data/bench/tinyld-light.json'),
 38 |     'tinyld-heavy': getJSON('./data/bench/tinyld-heavy.json'),
 39 |     langdetect: getJSON('./data/bench/langdetect.json'),
 40 |     cld: getJSON('./data/bench/cld.json'),
 41 |     franc: getJSON('./data/bench/franc.json'),
 42 |     'franc-min': getJSON('./data/bench/franc-min.json'),
 43 |     'franc-all': getJSON('./data/bench/franc-all.json'),
 44 |     languagedetect: getJSON('./data/bench/languagedetect.json')
 45 |   }
 46 | 
 47 |   const stats = (lib) => {
 48 |     return `${data[lib].stats.success_rate}% | ${data[lib].stats.error_rate}% | ${data[lib].stats.unindentified_rate}% | ${data[lib].stats.execution_time}ms.`
 49 |   }
 50 |   fs.writeFileSync(
 51 |     './docs/benchmark.md',
 52 |     `# NodeJS Language Detection Benchmark :rocket:
 53 | - This kind of benchmark is not perfect and % can vary over time, but it gives a good idea of overall performances
 54 | - Language evaluated in this benchmark:
 55 |     - Asia: \`jpn\`, \`cmn\`, \`kor\`, \`hin\`
 56 |     - Europe: \`fra\`, \`spa\`, \`por\`, \`ita\`, \`nld\`, \`eng\`, \`deu\`, \`fin\`, \`rus\`
 57 |     - Middle east: , \`tur\`, \`heb\`, \`ara\`
 58 | - This page and graphs are auto-generated from the code
 59 | 
 60 | ---
 61 | 
 62 | ## Libraries
 63 | 
 64 | Here is the list of libraries in this benchmark
 65 | 
 66 | | Library        | Script                      | Language | Properly Identified | Improperly identified | Not identified | Avg Execution Time | Disk Size |
 67 | | -------------- | --------------------------- | -------- | ------------------- | --------------------- | -------------- | ------------------ | --------- |
 68 | | **TinyLD Heavy** | \`yarn bench:tinyld-heavy\` | 64 | ${stats('tinyld-heavy')} | 2.0MB |
 69 | | **TinyLD**     | \`yarn bench:tinyld\`       | 64 | ${stats('tinyld')} | 580KB     |
 70 | | **TinyLD Light** | \`yarn bench:tinyld-light\` | 24 | ${stats('tinyld-light')} | 68KB |
 71 | | **langdetect | \`yarn bench:langdetect\`     | 53 | ${stats('langdetect')} |  1.8MB    |
 72 | | node-cld       | \`yarn bench:cld\`            | 160 | ${stats('cld')} |  > 10MB    |
 73 | | franc          | \`yarn bench:franc\`          | 187 | ${stats('franc')} |  267KB     |
 74 | | franc-min      | \`yarn bench:franc-min\`      | 82 | ${stats('franc-min')} |  119KB |
 75 | | franc-all      | \`yarn bench:franc-all\`      | 403 | ${stats('franc-all')} |  509KB     |
 76 | | languagedetect | \`yarn bench:languagedetect\` | 52 | ${stats('languagedetect')} |  240KB |
 77 | 
 78 | ---
 79 | 
 80 | ## Global Accuracy
 81 | ![Benchmark](./overall.svg)
 82 | 
 83 | We see two group of libraries
 84 | - \`tinyld\`, \`langdetect\` and \`cld\` over 90% accuracy
 85 | - \`franc\` and \`languagedetect\` under 75% accuracy
 86 | 
 87 | ## Per Language
 88 | ![Language](./language.svg)
 89 | 
 90 | We see big differences between languages:
 91 | * **Japanese** or **Korean** are almost at 100% for every libs (lot of unique characters)
 92 | * **Spanish** and **Portuguese** are really close and cause more false-positive and an higher error-rate
 93 | 
 94 | ## Accuracy By Text length
 95 | Most libraries are using statistical analysis, so longer is the input text, better will be the detection.
 96 | So we can often see quotes like this in those library documentations.
 97 | > Make sure to pass it big documents to get reliable results.
 98 | 
 99 | Let's see if this statement is true, and how those libraries behave for different input size (from small to long)
100 | ![Size](./length.svg)
101 | 
102 | So the previous quote is right, over 512 characters all the libs become accurate enough.
103 | 
104 | But for a ~95% accuracy threshold:
105 | * \`tinyld\` (green) reaches it around 24 characters
106 | * \`langdetect\` (cyan) and \`cld\` (orange) reach it around 48 characters
107 | 
108 | ## Execution Time
109 | ![Size](./exec_time.svg)
110 | 
111 | Here we can notice few things about performance:
112 | * \`langdetect\` (cyan) and \`franc\` (pink) seems to slow down at a similar rate
113 | * \`tinyld\` (green) slow down but at a really flat rate
114 | * \`cld\` (orange) is definitely the fastest and doesn't show any apparent slow down
115 | 
116 | But we've seen previously that some of those libraries need more than 256 characters to be accurate.
117 | It means they start to slow down at the same time they start to give decent results.
118 | 
119 | ---
120 | 
121 | ## **Conclusion**
122 | 
123 | ### Recommended :thumbsup:
124 | 
125 | #### - By platform :computer:
126 | 
127 | - For **NodeJS**: \`TinyLD\`, \`langdetect\` or \`node-cld\` (fast and accurate)
128 | - For **Browser**: \`TinyLD Light\` or \`franc-min\` (small, decent accuracy, franc is less accurate but support more languages)
129 | 
130 | #### - By usage :speech_balloon:
131 | 
132 | - Short text (chatbot, keywords, database, ...): \`TinyLD\` or \`langdetect\`
133 | - Long text (documents, webpage): \`node-cld\` or \`TinyLD\`
134 | 
135 | ### Not recommended :thumbsdown:
136 | 
137 | - \`franc-all\` is the worst in terms of accuracy, not a surprise because it tries to detect 400+ languages with only 3-grams. A technical demo to put big numbers but useless for real usage, even a language like english barely reaches ~45% detection rate.
138 | - \`languagedetect\` is light but just not accurate enough
139 | 
140 | ---
141 | 
142 | ## Last word :raising_hand:
143 | 
144 | Thanks for reading this article, those metrics are really helpful for the development of \`tinyld\`.
145 | It's used in the development to see the impact of every modification and features.
146 | 
147 | If you want to contribute or see another library in this benchmark, [open an issue](https://github.com/komodojp/tinyld/issues)`
148 |   )
149 | }
150 | 
151 | ;(async () => {
152 |   await generateDocLangs()
153 |   await generateDocBenchmark()
154 | })()
155 | 


--------------------------------------------------------------------------------
/src/benchmark/bench.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs'
  2 | import readline from 'readline'
  3 | import { approximate, getCoef, langs, langName, toISO2 } from '../core'
  4 | 
  5 | type DetectMethod = (val: string) => Promise<string> | string
  6 | 
  7 | export type BenchmarkResult = {
  8 |   size: Record<string, BenchmarkSize>
  9 |   stats: {
 10 |     min: number
 11 |     max: number
 12 |     success_rate: number
 13 |     error_rate: number
 14 |     unindentified_rate: number
 15 |     execution_time: number
 16 |   }
 17 |   languages: Record<string, number>
 18 | }
 19 | 
 20 | type BenchmarkSize = { success_rate: number; error_rate: number; unindentified_rate: number; execution_time: number }
 21 | type CountPerSize = {
 22 |   min: number
 23 |   max: number
 24 |   buffer: string
 25 |   total: number
 26 |   success: number
 27 |   error: number
 28 |   unidentified: number
 29 |   exec: number
 30 | }
 31 | 
 32 | const benchLangs = new Set([
 33 |   'jpn',
 34 |   'cmn',
 35 |   'kor',
 36 |   'hin',
 37 |   'nld',
 38 |   'fra',
 39 |   'eng',
 40 |   'deu',
 41 |   'spa',
 42 |   'por',
 43 |   'ita',
 44 |   'fin',
 45 |   'rus',
 46 |   'tur',
 47 |   'heb',
 48 |   'ara'
 49 | ])
 50 | 
 51 | export async function benchmark(detect: DetectMethod): Promise<BenchmarkResult> {
 52 |   const total = new Map<string, number>()
 53 |   const success = new Map<string, number>()
 54 |   let detectTotal = 0
 55 |   let detectIdentified = 0
 56 |   let detectUnidentified = 0
 57 |   let detectMistake = 0
 58 |   let executionTime = 0
 59 | 
 60 |   const countCategories = [
 61 |     { min: 0, max: 10 },
 62 |     { min: 10, max: 16 },
 63 |     { min: 16, max: 24 },
 64 |     { min: 24, max: 36 },
 65 |     { min: 36, max: 48 },
 66 |     { min: 48, max: 64 },
 67 |     { min: 64, max: 128 },
 68 |     { min: 128, max: 256 },
 69 |     { min: 256, max: 512 },
 70 |     { min: 512, max: 1024 }
 71 |   ]
 72 | 
 73 |   const globalCount: Record<number, BenchmarkSize> = Object.fromEntries(
 74 |     countCategories.map((x) => [x.max, { success_rate: 0, error_rate: 0, unindentified_rate: 0, execution_time: 0 }])
 75 |   )
 76 | 
 77 |   const errorMap = new Map<string, number>()
 78 | 
 79 |   for (const country of benchLangs.values()) {
 80 |     const fileStream = fs.createReadStream(`data/tmp/${country}/sentences.txt`)
 81 |     const rl = readline.createInterface({
 82 |       input: fileStream,
 83 |       crlfDelay: Infinity
 84 |     })
 85 | 
 86 |     let line = 0
 87 | 
 88 |     const langCount: Record<string, CountPerSize> = Object.fromEntries(
 89 |       countCategories.map((x) => [
 90 |         x.max,
 91 |         { min: x.min, max: x.max, buffer: '', total: 0, success: 0, error: 0, unidentified: 0, exec: 0 }
 92 |       ])
 93 |     )
 94 | 
 95 |     for await (const text of rl) {
 96 |       if (text.length < 16) continue
 97 |       line += 1
 98 |       if (line > 10000) break
 99 | 
100 |       total.set(country, (total.get(country) || 0) + 1)
101 |       detectTotal += 1
102 | 
103 |       const start = process.hrtime()
104 |       const res = await detect(text)
105 |       const duration = process.hrtime(start)[1] / 1000000
106 |       executionTime += duration
107 | 
108 |       if (res === '') {
109 |         detectUnidentified += 1
110 |       } else if (res === toISO2(country)) {
111 |         success.set(country, (success.get(country) || 0) + 1)
112 |         detectIdentified += 1
113 |       } else {
114 |         detectMistake += 1
115 |         const errorKey = `${toISO2(country)} -> ${res}`
116 |         errorMap.set(errorKey, (errorMap.get(errorKey) || 0) + 1)
117 |       }
118 |     }
119 | 
120 |     fileStream.close()
121 | 
122 |     const fileStream2 = fs.createReadStream(`data/tmp/${country}/sentences.txt`)
123 |     const rl2 = readline.createInterface({
124 |       input: fileStream2,
125 |       crlfDelay: Infinity
126 |     })
127 | 
128 |     for await (const text of rl2) {
129 |       for (const size of countCategories.map((x) => x.max)) {
130 |         if (langCount[size].buffer.length + text.length < langCount[size].max) {
131 |           if (langCount[size].buffer) {
132 |             langCount[size].buffer += `. ${text}`
133 |           } else {
134 |             langCount[size].buffer = text
135 |           }
136 | 
137 |           continue
138 |         }
139 | 
140 |         if (
141 |           langCount[size].buffer &&
142 |           langCount[size].total < 200 &&
143 |           langCount[size].buffer.length >= langCount[size].min &&
144 |           langCount[size].buffer.length <= langCount[size].max
145 |         ) {
146 |           const start = process.hrtime()
147 |           const res = await detect(langCount[size].buffer)
148 |           const duration = process.hrtime(start)[1] / 1000000
149 |           langCount[size].exec += duration
150 |           if (res === '') {
151 |             langCount[size].unidentified += 1
152 |           } else if (res === toISO2(country)) {
153 |             langCount[size].success += 1
154 |           } else {
155 |             langCount[size].error += 1
156 |           }
157 |           langCount[size].total += 1
158 |         }
159 | 
160 |         langCount[size].buffer = ''
161 |       }
162 |     }
163 | 
164 |     fileStream2.close()
165 | 
166 |     for (const size of countCategories.map((x) => x.max)) {
167 |       globalCount[size].success_rate += langCount[size].success
168 |       globalCount[size].error_rate += langCount[size].error
169 |       globalCount[size].unindentified_rate += langCount[size].unidentified
170 |       globalCount[size].execution_time += langCount[size].exec
171 |     }
172 |   }
173 | 
174 |   for (const size of countCategories.map((x) => x.max)) {
175 |     const entry = globalCount[size]
176 |     const cpt = entry.success_rate + entry.error_rate + entry.unindentified_rate
177 | 
178 |     entry.success_rate = approximate((entry.success_rate / cpt) * 100)
179 |     entry.error_rate = approximate((entry.error_rate / cpt) * 100)
180 |     entry.unindentified_rate = approximate((entry.unindentified_rate / cpt) * 100)
181 |     entry.execution_time = approximate(entry.execution_time / cpt)
182 |   }
183 | 
184 |   console.log(`--- Per language Accuracy ---`)
185 |   const languageAccuracy: [string, number][] = []
186 |   const acc: [number, string][] = []
187 |   for (const lang of total.keys()) {
188 |     const s = success.get(lang) || 1
189 |     const t = total.get(lang) || 1
190 |     acc.push([s / t, ` - ${langName(lang)} (${lang}) - ${approximate((s / t) * 100)}% (coef: ${getCoef(lang)})`])
191 |     languageAccuracy.push([lang, approximate((s / t) * 100)])
192 |   }
193 |   acc.sort((a, b) => b[0] - a[0])
194 |   languageAccuracy.sort((a, b) => b[1] - a[1])
195 |   acc.forEach((x) => console.log(x[1]))
196 | 
197 |   const errors = [...errorMap.entries()]
198 |   errors.sort((a, b) => b[1] - a[1])
199 |   console.log(
200 |     `\n--- More common errors (${
201 |       Math.round((detectMistake / detectTotal) * 100 * 100) / 100
202 |     }% : ${detectMistake} / ${detectTotal}) ---`
203 |   )
204 |   console.log(
205 |     errors
206 |       .map((x) => ` - ${x[0]} : ${approximate((100 * x[1]) / detectMistake)}% (error: ${x[1]})`)
207 |       .slice(0, 20)
208 |       .join('\n')
209 |   )
210 | 
211 |   console.log(`\n--- Summary (${langs.size} languages) ---`)
212 |   console.log(` - Properly identified: ${approximate((detectIdentified / detectTotal) * 100)}%`)
213 |   console.log(` - Improperly identified: ${approximate((detectMistake / detectTotal) * 100)}%`)
214 |   console.log(` - Unidentified: ${approximate((detectUnidentified / detectTotal) * 100)}%`)
215 |   console.log(` - Avg exec time: ${approximate(executionTime / detectTotal)}ms.`)
216 | 
217 |   return {
218 |     size: globalCount,
219 |     stats: {
220 |       min: Math.min(...languageAccuracy.map((x) => x[1])),
221 |       max: Math.max(...languageAccuracy.map((x) => x[1])),
222 |       success_rate: approximate((detectIdentified / detectTotal) * 100),
223 |       error_rate: approximate((detectMistake / detectTotal) * 100),
224 |       unindentified_rate: approximate((detectUnidentified / detectTotal) * 100),
225 |       execution_time: approximate(executionTime / detectTotal)
226 |     },
227 |     languages: Object.fromEntries(languageAccuracy)
228 |   }
229 | }
230 | 


--------------------------------------------------------------------------------
/playground/src/App.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <div>
  3 |     <div class="area" >
  4 |       <div class="absolute flex w-full h-full justify-center items-center">
  5 |         <div class="w-[600px] max-w-[80vw] font-bold text-4xl z-10">
  6 |           <h1 class="text-slate-50">TinyLD Playground</h1>
  7 |           <p class="text-sm font-normal mt-2 text-slate-300">
  8 |             Tiny Language Detector, simply detect the language of a unicode UTF-8 text
  9 |           </p>
 10 |           <div class="flex justify-between mt-4">
 11 |             <div class="flex items-center px-4 py-2 rounded border border-gray-200 dark:border-gray-700 bg-slate-200">
 12 |               <input id="bordered-radio-2" type="radio" v-model="flavor" value="tinyld-light" name="bordered-radio" class="w-4 h-4 text-blue-600 bg-gray-100 border-gray-300 focus:ring-blue-500 dark:focus:ring-blue-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600">
 13 |               <label for="bordered-radio-2" class="ml-2 w-full text-sm font-medium text-gray-900 dark:text-gray-800">TinyLD Light <span class="text-slate-400">~65KB</span></label>
 14 |             </div>
 15 |             <div class="flex items-center px-4 py-2 rounded border border-gray-200 dark:border-gray-700 bg-slate-200">
 16 |               <input id="bordered-radio-1" type="radio" v-model="flavor" value="tinyld" name="bordered-radio" class="w-4 h-4 text-blue-600 bg-gray-100 border-gray-300 focus:ring-blue-500 dark:focus:ring-blue-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600">
 17 |               <label for="bordered-radio-1" class="ml-2 w-full text-sm font-medium text-gray-900 dark:text-gray-800">TinyLD <span class="text-slate-400">~500KB</span></label>
 18 |             </div>
 19 |             <div class="flex items-center px-4 py-2 rounded border border-gray-200 dark:border-gray-700 bg-slate-200">
 20 |               <input id="bordered-radio-3" type="radio" v-model="flavor" value="tinyld-heavy" name="bordered-radio" class="w-4 h-4 text-blue-600 bg-gray-100 border-gray-300 focus:ring-blue-500 dark:focus:ring-blue-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600">
 21 |               <label for="bordered-radio-3" class="ml-2 w-full text-sm font-medium text-gray-900 dark:text-gray-800">TinyLD Heavy <span class="text-slate-400">~2MB</span></label>
 22 |             </div>
 23 |           </div>
 24 |           <textarea
 25 |             rows="8"
 26 |             v-model="input"
 27 |             placeholder="Enter your text here"
 28 |             class="block w-full px-3 py-2 bg-white border border-slate-300 rounded-md text-sm shadow-sm placeholder-slate-400
 29 |       focus:outline-none focus:border-sky-500 focus:ring-1 focus:ring-sky-500 font-normal mt-5"/>
 30 | 
 31 |           <div v-if="results && results.length > 0" class="font-normal bg-white border shadow-sm rounded-md mt-2 px-3 py-2 text-sm">
 32 |             <ul class="list-item">
 33 |               <li v-for="(result, index) in results" :key="result.lang" class="flex items-center pt-1">
 34 |                 <span style="width: 80px" class="px-2 font-semibold uppercase">
 35 |                   {{ result.lang }}
 36 |                 </span>
 37 |                 <div class="w-full bg-gray-100 rounded-full h-2.5 dark:bg-gray-700">
 38 |                   <div v-if="index === 0" class="bg-blue-400 h-2.5 rounded-full" :style="`width: ${Math.round(result.accuracy * 10000)/100 }%`"></div>
 39 |                   <div v-else class="bg-yellow-200 h-2.5 rounded-full" :style="`width: ${Math.round(result.accuracy * 10000)/100 }%`"></div>
 40 |                 </div>
 41 |                 <span style="width: 80px" class="px-2">
 42 |                   {{ Math.round(result.accuracy * 10000)/100 }}%
 43 |                 </span>
 44 |               </li>
 45 |             </ul>
 46 |           </div>
 47 | 
 48 |           <div class="flex gap-3 pt-4">
 49 |             <a href="https://github.com/komodojp/tinyld" alt="Github" target="_blank" class="text-slate-200 hover:text-slate-50">
 50 |               <v-icon label="github" :icon="{ prefix: 'fab', iconName: 'github' }" />
 51 |             </a>
 52 |             <a href="https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md" alt="Benchmark" target="_blank" class="text-slate-200 hover:text-slate-50">
 53 |               <v-icon label="benchmark" icon="chart-line" />
 54 |             </a>
 55 |             <a href="https://github.com/komodojp/tinyld/blob/develop/docs/faq.md" alt="FAQ" target="_blank" class="text-slate-200 hover:text-slate-50">
 56 |               <v-icon label="FAQ" :icon="{ prefix: 'far', iconName: 'circle-question' }" />
 57 |             </a>
 58 |             <a href="#" alt="Share" class="text-slate-200 hover:text-slate-50" @click="copy()">
 59 |               <v-icon label="Share" icon="share-from-square" />
 60 |               <span v-if="copied" class="text-slate-50 pl-2 text-lg">Copied to clipboard !</span>
 61 |             </a>
 62 |           </div>
 63 |         </div>
 64 |       </div>
 65 | 
 66 |       <ul class="circles">
 67 |         <li></li>
 68 |         <li></li>
 69 |         <li></li>
 70 |         <li></li>
 71 |         <li></li>
 72 |         <li></li>
 73 |         <li></li>
 74 |         <li></li>
 75 |         <li></li>
 76 |         <li></li>
 77 |       </ul>
 78 |     </div >
 79 |   </div>
 80 | </template>
 81 | 
 82 | <script setup>
 83 | import { ref, computed } from 'vue'
 84 | import { detectAll } from 'tinyld'
 85 | import { detectAll as detectAllLight } from 'tinyld/light'
 86 | import { detectAll as detectAllHeavy } from 'tinyld/heavy'
 87 | import { useClipboard } from '@vueuse/core'
 88 | 
 89 | const input = ref('')
 90 | const flavor = ref('tinyld')
 91 | const results = computed(() => {
 92 |   if (flavor.value === "tinyld-light") return detectAllLight(input.value)
 93 |   if (flavor.value === "tinyld-heavy") return detectAllHeavy(input.value)
 94 |   return detectAll(input.value)
 95 | })
 96 | 
 97 | const link = computed(() => {
 98 |   return `https://komodojp.github.io/tinyld/?text=${encodeURIComponent(input.value)}`
 99 | })
100 | 
101 | const clipboard = useClipboard({ source: link })
102 | 
103 | const copied = ref(false)
104 | const copy = () => {
105 |   clipboard.copy()
106 |   copied.value = true
107 |   setTimeout(() => {
108 |     copied.value = false
109 |   }, 2000)
110 | }
111 | </script>
112 | 
113 | <style scoped>
114 | .area{
115 |     background: #4e54c8;
116 |     background: -webkit-linear-gradient(to left, #8f94fb, #4e54c8);
117 |     width: 100%;
118 |     height:100vh;
119 | }
120 | 
121 | .circles{
122 |     position: absolute;
123 |     top: 0;
124 |     left: 0;
125 |     width: 100%;
126 |     height: 100%;
127 |     overflow: hidden;
128 | }
129 | 
130 | .circles li{
131 |     position: absolute;
132 |     display: block;
133 |     list-style: none;
134 |     width: 20px;
135 |     height: 20px;
136 |     background: rgba(255, 255, 255, 0.2);
137 |     animation: animate 25s linear infinite;
138 |     bottom: -150px;
139 | 
140 | }
141 | 
142 | .circles li:nth-child(1){
143 |     left: 25%;
144 |     width: 80px;
145 |     height: 80px;
146 |     animation-delay: 0s;
147 | }
148 | 
149 | 
150 | .circles li:nth-child(2){
151 |     left: 10%;
152 |     width: 20px;
153 |     height: 20px;
154 |     animation-delay: 2s;
155 |     animation-duration: 12s;
156 | }
157 | 
158 | .circles li:nth-child(3){
159 |     left: 70%;
160 |     width: 20px;
161 |     height: 20px;
162 |     animation-delay: 4s;
163 | }
164 | 
165 | .circles li:nth-child(4){
166 |     left: 40%;
167 |     width: 60px;
168 |     height: 60px;
169 |     animation-delay: 0s;
170 |     animation-duration: 18s;
171 | }
172 | 
173 | .circles li:nth-child(5){
174 |     left: 65%;
175 |     width: 20px;
176 |     height: 20px;
177 |     animation-delay: 0s;
178 | }
179 | 
180 | .circles li:nth-child(6){
181 |     left: 75%;
182 |     width: 110px;
183 |     height: 110px;
184 |     animation-delay: 3s;
185 | }
186 | 
187 | .circles li:nth-child(7){
188 |     left: 35%;
189 |     width: 150px;
190 |     height: 150px;
191 |     animation-delay: 7s;
192 | }
193 | 
194 | .circles li:nth-child(8){
195 |     left: 50%;
196 |     width: 25px;
197 |     height: 25px;
198 |     animation-delay: 15s;
199 |     animation-duration: 45s;
200 | }
201 | 
202 | .circles li:nth-child(9){
203 |     left: 20%;
204 |     width: 15px;
205 |     height: 15px;
206 |     animation-delay: 2s;
207 |     animation-duration: 35s;
208 | }
209 | 
210 | .circles li:nth-child(10){
211 |     left: 85%;
212 |     width: 150px;
213 |     height: 150px;
214 |     animation-delay: 0s;
215 |     animation-duration: 11s;
216 | }
217 | 
218 | @keyframes animate {
219 |     0%{
220 |         transform: translateY(0) rotate(0deg);
221 |         opacity: 1;
222 |         border-radius: 0;
223 |     }
224 |     100%{
225 |         transform: translateY(-1000px) rotate(720deg);
226 |         opacity: 0;
227 |         border-radius: 50%;
228 |     }
229 | }
230 | </style>
231 | 


--------------------------------------------------------------------------------
/src/train.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs'
  2 | import readline from 'readline'
  3 | import { ngramTokenizer } from './tokenizer'
  4 | import { processSentencesLineByLine } from './train/splitter'
  5 | import pLimit from 'p-limit'
  6 | import {
  7 |   configSet,
  8 |   getLangTopStatsGram,
  9 |   isSkipProba,
 10 |   langs,
 11 |   langToId,
 12 |   supportedLanguages,
 13 |   TOP_LANGUAGE_UNIQUE_GRAMS,
 14 |   TRAINING_UNIQUE_GRAMS
 15 | } from './core'
 16 | 
 17 | const banWordList = new Set(['tatoeba', 'facebook', 'tom', '=', '-', '﹣'])
 18 | 
 19 | async function processLang(lang: string) {
 20 |   const wordRank = new Map<string, number>()
 21 |   if (!fs.existsSync(`data/tmp`)) fs.mkdirSync(`data/tmp`)
 22 |   if (!fs.existsSync(`data/tmp/${lang}`)) fs.mkdirSync(`data/tmp/${lang}`)
 23 | 
 24 |   if (!fs.existsSync(`data/tmp/${lang}/sentences.txt`)) {
 25 |     console.log(`Create ${lang} sentences.txt`)
 26 |     const fileStream = fs.createReadStream('data/tatoeba.csv')
 27 |     const writeStream = fs.createWriteStream(`data/tmp/${lang}/sentences.txt`, { flags: 'a' })
 28 |     const rl = readline.createInterface({
 29 |       input: fileStream,
 30 |       crlfDelay: Infinity
 31 |     })
 32 | 
 33 |     for await (const line of rl) {
 34 |       const [, country, text] = line.split('\t')
 35 |       if (country != lang) continue
 36 |       writeStream.write(`${text}\n`)
 37 |     }
 38 | 
 39 |     if (fs.existsSync(`data/udhr/udhr_${lang}.txt`)) {
 40 |       const udhrFileStream = fs.createReadStream(`data/udhr/udhr_${lang}.txt`)
 41 |       const rl2 = readline.createInterface({
 42 |         input: udhrFileStream,
 43 |         crlfDelay: Infinity
 44 |       })
 45 |       let useLine = false
 46 |       for await (const line of rl2) {
 47 |         if (line === '---') {
 48 |           useLine = true
 49 |           continue
 50 |         }
 51 |         if (!useLine) continue
 52 |         if (!line.trim() || line.length < 24) continue
 53 |         writeStream.write(`${line.trim()}\n`)
 54 |       }
 55 |     }
 56 | 
 57 |     writeStream.close()
 58 |     rl.close()
 59 |   }
 60 | 
 61 |   if (!fs.existsSync(`data/tmp/${lang}/words.txt`)) {
 62 |     console.log(`Create ${lang} words.txt`)
 63 | 
 64 |     // parse sentences file
 65 |     const res = await processSentencesLineByLine(`data/tmp/${lang}/sentences.txt`)
 66 |     res.forEach((x) => wordRank.set(x.word, (wordRank.get(x.word) || 0) + x.count))
 67 | 
 68 |     // words
 69 |     const wordOutStream = fs.createWriteStream(`data/tmp/${lang}/words.txt`, { flags: 'a' })
 70 |     const values = [...wordRank.entries()]
 71 |     values.sort((a, b) => b[1] - a[1])
 72 |     const wordMax = values[0][1]
 73 |     values.forEach((x) => {
 74 |       if (x[1] < 10 || x[1] / wordMax < 0.00001) return
 75 |       if (banWordList.has(x[0])) return
 76 |       wordOutStream.write(`${x[0]}\t${x[1] / wordMax}\n`)
 77 |     })
 78 |     wordOutStream.close()
 79 | 
 80 |     for (const gram of [1, 2, 3, 4, 5]) {
 81 |       const gramRank = new Map<string, number>()
 82 | 
 83 |       for (const word of wordRank.keys()) {
 84 |         const count = wordRank.get(word) || 0
 85 | 
 86 |         ngramTokenizer(word, gram).forEach((x) => {
 87 |           gramRank.set(x, (gramRank.get(x) || 0) + count)
 88 |         })
 89 |       }
 90 | 
 91 |       const gramOutStream = fs.createWriteStream(`data/tmp/${lang}/${gram}-gram.txt`, { flags: 'a' })
 92 |       const gramValues = [...gramRank.entries()]
 93 |       gramValues.sort((a, b) => b[1] - a[1])
 94 |       const max = gramValues[0][1]
 95 |       gramValues.forEach((x) => {
 96 |         if (x[1] < 10 || x[1] / max < 0.00001) return
 97 |         gramOutStream.write(`${x[0]}\t${x[1] / max}\n`)
 98 |       })
 99 |       gramOutStream.close()
100 |     }
101 |   }
102 | }
103 | 
104 | const checkNgramContains = (txt: string, gram: number, uniques: Set<string>) => {
105 |   for (let i = 1; i < gram; i++) {
106 |     const grams = ngramTokenizer(txt, i)
107 |     if (grams.some((x) => uniques.has(x))) return true
108 |   }
109 |   return false
110 | }
111 | 
112 | async function processUniqueGrams(langs: string[], gram: number, uniques: Set<string>) {
113 |   const map = new Map<string, Map<string, { count: number; index: number }>>()
114 |   for (const lang of langs) {
115 |     // console.log('process Gram', lang, gram)
116 |     const fileStream = fs.createReadStream(`data/tmp/${lang}/${gram}-gram.txt`)
117 |     const rl = readline.createInterface({
118 |       input: fileStream,
119 |       crlfDelay: Infinity
120 |     })
121 |     let index = 0
122 |     for await (const line of rl) {
123 |       const [txt, usage] = line.split('\t')
124 |       if (banWordList.has(txt)) continue
125 |       if (checkNgramContains(txt, gram, uniques)) continue
126 |       if (!map.has(txt)) map.set(txt, new Map())
127 | 
128 |       const gramData = map.get(txt) as Map<string, { count: number; index: number }>
129 |       gramData.set(lang, { count: parseFloat(usage), index })
130 |       index++
131 |     }
132 |     fileStream.close()
133 |   }
134 | 
135 |   const uniqueMap = new Map<string, [number, string][]>()
136 |   ;[...map.entries()].forEach((gram) => {
137 |     if (gram[1].size !== 1) return
138 |     const txt = gram[0]
139 |     const country = [...gram[1].keys()][0]
140 |     const index = gram[1].get(country)?.index || 0
141 |     if (!uniqueMap.has(country)) uniqueMap.set(country, [])
142 |     uniqueMap.get(country)?.push([index, txt])
143 |   })
144 | 
145 |   const result: { [id: string]: string } = {}
146 |   ;[...uniqueMap.entries()].forEach((x) => {
147 |     let count = 0
148 |     x[1].forEach((y) => {
149 |       if (count > TOP_LANGUAGE_UNIQUE_GRAMS) return
150 |       count++
151 |       result[y[1]] = x[0]
152 |     })
153 |   })
154 | 
155 |   return result
156 | }
157 | 
158 | async function processLangGrams(langs: string[], gram: number, uniques: Set<string>) {
159 |   const allGrams = new Map<string, string[]>()
160 |   for (const lang of langs) {
161 |     if (isSkipProba(lang)) continue
162 |     const fileStream = fs.createReadStream(`data/tmp/${lang}/${gram}-gram.txt`)
163 |     const rl = readline.createInterface({
164 |       input: fileStream,
165 |       crlfDelay: Infinity
166 |     })
167 |     for await (const line of rl) {
168 |       const [txt] = line.split('\t')
169 |       if (txt in uniques) continue
170 |       allGrams.set(txt, [...(allGrams.get(txt) || []), txt])
171 |     }
172 |     fileStream.close()
173 |   }
174 | 
175 |   const grams = new Set()
176 |   for (const lang of langs) {
177 |     if (isSkipProba(lang)) continue
178 |     const fileStream = fs.createReadStream(`data/tmp/${lang}/${gram}-gram.txt`)
179 |     const rl = readline.createInterface({
180 |       input: fileStream,
181 |       crlfDelay: Infinity
182 |     })
183 | 
184 |     let normal = getLangTopStatsGram(lang)
185 |     for await (const line of rl) {
186 |       const [txt] = line.split('\t')
187 |       if (txt in uniques) continue
188 |       if ((allGrams.get(txt)?.length || 0) > 12) continue
189 | 
190 |       grams.add(txt)
191 |       normal--
192 |       if (normal <= 0) break
193 |     }
194 |     fileStream.close()
195 |   }
196 | 
197 |   const langGrams = new Map<string, Record<string, number>>()
198 |   for (const lang of langs) {
199 |     if (isSkipProba(lang)) continue
200 |     const fileStream = fs.createReadStream(`data/tmp/${lang}/${gram}-gram.txt`)
201 |     const rl = readline.createInterface({
202 |       input: fileStream,
203 |       crlfDelay: Infinity
204 |     })
205 | 
206 |     for await (const line of rl) {
207 |       const [txt, usage] = line.split('\t')
208 |       const value = parseFloat(usage)
209 |       if (!grams.has(txt)) continue
210 |       const data = langGrams.get(txt) || {}
211 |       const rnd = Math.round(1024 * (1 - (1 - value) * (1 - value)))
212 |       if (rnd <= 1) break
213 |       data[lang] = rnd
214 |       langGrams.set(txt, data)
215 |     }
216 |     fileStream.close()
217 |   }
218 | 
219 |   return Object.fromEntries(langGrams.entries())
220 | }
221 | 
222 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
223 | function sortKeys(_key: string, value: any) {
224 |   if (value == null || value.constructor != Object) {
225 |     return value
226 |   }
227 | 
228 |   return (
229 |     Object.keys(value)
230 |       .sort()
231 |       // eslint-disable-next-line @typescript-eslint/no-explicit-any
232 |       .reduce((s: any, k: any) => {
233 |         s[k] = value[k]
234 |         return s
235 |       }, {})
236 |   )
237 | }
238 | 
239 | async function processFiles() {
240 |   const processLangs = [...langs.values()]
241 | 
242 |   const limit = pLimit(4)
243 | 
244 |   await Promise.all(
245 |     processLangs.map((lang) => {
246 |       return limit(() => processLang(lang))
247 |     })
248 |   )
249 | 
250 |   let uniques: Record<string, string> = {}
251 |   for (const gram of TRAINING_UNIQUE_GRAMS) {
252 |     const un = new Set([...Object.keys(uniques)])
253 |     uniques = { ...uniques, ...(await processUniqueGrams(processLangs, gram, un)) }
254 |   }
255 | 
256 |   let multiples: Record<string, Record<string, number>> = {}
257 |   for (const gram of TRAINING_UNIQUE_GRAMS) {
258 |     const un = new Set([...Object.keys(uniques)])
259 |     multiples = { ...multiples, ...(await processLangGrams(processLangs, gram, un)) }
260 |   }
261 | 
262 |   fs.writeFileSync(
263 |     `src/profiles/${configSet}.json`,
264 |     JSON.stringify(
265 |       {
266 |         id: 'tinyld-dict',
267 |         uniques: Object.fromEntries(
268 |           Object.entries(uniques).map((x) => {
269 |             const val = langToId[x[1]].toString(36)
270 |             return [x[0], isNaN(val as unknown as number) ? val : parseInt(val)]
271 |           })
272 |         ),
273 |         multiples: Object.fromEntries(
274 |           Object.entries(multiples).map((x) => {
275 |             let str = ''
276 |             Object.entries(x[1]).forEach((y) => {
277 |               const country = langToId[y[0]].toString(36).padStart(supportedLanguages.length > 36 ? 2 : 1, '0')
278 |               const value = y[1].toString(36).padStart(2, '0')
279 |               str += `${country.toUpperCase()}${value.toUpperCase()}`
280 |             })
281 |             return [x[0], str]
282 |           })
283 |         )
284 |       },
285 |       sortKeys,
286 |       2
287 |     )
288 |   )
289 | 
290 |   console.log('End')
291 | }
292 | 
293 | processFiles()
294 | 


--------------------------------------------------------------------------------
/src/core.ts:
--------------------------------------------------------------------------------
  1 | export interface ILangProfiles {
  2 |   uniques: { [id: string]: string }
  3 |   multiples: { [gram: string]: { [country: string]: number } }
  4 | }
  5 | 
  6 | export interface ILangCompressedProfiles {
  7 |   uniques: { [id: string]: string | number }
  8 |   multiples: { [gram: string]: string }
  9 | }
 10 | 
 11 | // different config profiles
 12 | const config = {
 13 |   light: {
 14 |     TRAINING_UNIQUE_GRAMS: [1, 2, 3, 4],
 15 |     TOP_LANGUAGE_UNIQUE_GRAMS: 60,
 16 |     TOP_LANGUAGE_STATS_GRAMS: 50
 17 |   },
 18 |   normal: {
 19 |     TRAINING_UNIQUE_GRAMS: [1, 2, 3, 4, 5],
 20 |     TOP_LANGUAGE_UNIQUE_GRAMS: 100,
 21 |     TOP_LANGUAGE_STATS_GRAMS: 180
 22 |   },
 23 |   heavy: {
 24 |     TRAINING_UNIQUE_GRAMS: [1, 2, 3, 4, 5],
 25 |     TOP_LANGUAGE_UNIQUE_GRAMS: 320,
 26 |     TOP_LANGUAGE_STATS_GRAMS: 1024
 27 |   }
 28 | }
 29 | 
 30 | // configuration
 31 | export const configSet = (process.env.TINYLD_CONFIG || 'normal') as 'heavy' | 'normal' | 'light'
 32 | export const TRAINING_UNIQUE_GRAMS = config[configSet].TRAINING_UNIQUE_GRAMS
 33 | export const TOP_LANGUAGE_UNIQUE_GRAMS = config[configSet].TOP_LANGUAGE_UNIQUE_GRAMS
 34 | export const TOP_LANGUAGE_STATS_GRAMS = config[configSet].TOP_LANGUAGE_STATS_GRAMS
 35 | const PROBABILITY_ACCURACY = 10000
 36 | 
 37 | export function approximate(value: number): number {
 38 |   return Math.round(value * PROBABILITY_ACCURACY) / PROBABILITY_ACCURACY
 39 | }
 40 | 
 41 | export function isSkipProba(country: string): boolean {
 42 |   return langMap[country].coefProb === 0
 43 | }
 44 | 
 45 | export function getCoef(country: string): number {
 46 |   return langMap[country].coefProb ?? 1
 47 | }
 48 | 
 49 | export function getLangTopStatsGram(country: string): number {
 50 |   return Math.round((langMap[country].coefProb ?? 1) * TOP_LANGUAGE_STATS_GRAMS)
 51 | }
 52 | 
 53 | type LangOption = {
 54 |   code: string
 55 |   region: string
 56 |   name: string
 57 |   alias?: string[]
 58 |   skipLight?: boolean
 59 |   coefProb?: number
 60 | }
 61 | 
 62 | export const parseDetectOption = (options?: Partial<DetectOption>): DetectOption => {
 63 |   const data = { only: [], verbose: false } as DetectOption
 64 |   if (!options) return data
 65 |   return Object.assign(data, options)
 66 | }
 67 | 
 68 | export interface DetectOption {
 69 |   only: string[]
 70 |   verbose: boolean
 71 | }
 72 | 
 73 | // Map ISO 639-3 <-> ISO 639-1
 74 | const langMap: { [id: string]: LangOption } = {
 75 |   // africa
 76 |   afr: { code: 'af', region: 'africa', name: 'Afrikaans', skipLight: true },
 77 |   amh: { code: 'am', region: 'africa', name: 'Amharic', skipLight: true, coefProb: 0 },
 78 |   // hau: { code: 'ha', region: 'africa', name: 'Hausa', skipLight: true },
 79 |   ber: { code: 'ber', region: 'africa', name: 'Berber', skipLight: true, coefProb: 0.25 },
 80 |   run: { code: 'rn', region: 'africa', name: 'Kirundi', skipLight: true },
 81 |   // swh: { code: 'sw', region: 'africa', name: 'Swahili', skipLight: true },
 82 |   // yor: { code: 'yo', region: 'africa', name: 'Yoruba', skipLight: true },
 83 | 
 84 |   // asia
 85 |   jpn: { code: 'ja', region: 'asia-east', name: 'Japanese', alias: ['jp'], coefProb: 0 },
 86 |   cmn: { code: 'zh', region: 'asia-east', name: 'Chinese', alias: ['cn'], coefProb: 0.25 },
 87 |   kor: { code: 'ko', region: 'asia-east', name: 'Korean', alias: ['kr'], coefProb: 0 },
 88 |   mya: { code: 'my', region: 'asia', name: 'Burmese', skipLight: true, coefProb: 0 },
 89 |   tha: { code: 'th', region: 'asia', name: 'Thai', coefProb: 0 },
 90 |   vie: { code: 'vi', region: 'asia', name: 'Vietnamese', skipLight: true, coefProb: 0 },
 91 |   ind: { code: 'id', region: 'asia', name: 'Indonesian', skipLight: true },
 92 |   khm: { code: 'km', region: 'asia', name: 'Khmer', skipLight: true, coefProb: 0 },
 93 |   // zsm: { code: 'ms', region: 'asia', name: 'Malaysian', skipLight: true },
 94 |   tgl: { code: 'tl', region: 'asia', name: 'Tagalog', skipLight: true },
 95 |   // jav: { code: 'jv', region: 'asia', name: 'Javanese', skipLight: true },
 96 |   ben: { code: 'bn', region: 'asia-south', name: 'Bengali', coefProb: 0 },
 97 |   tam: { code: 'ta', region: 'asia-south', name: 'Tamil', skipLight: true, coefProb: 0 },
 98 |   // mar: { code: 'mr', region: 'asia-south', name: 'Marathi', skipLight: true, coefProb: 0 },
 99 |   hin: { code: 'hi', region: 'asia-south', name: 'Hindi', coefProb: 0 },
100 |   urd: { code: 'ur', region: 'asia-south', name: 'Urdu', skipLight: true, coefProb: 0 },
101 |   guj: { code: 'gu', region: 'asia-south', name: 'Gujarati', skipLight: true, coefProb: 0 },
102 |   kan: { code: 'kn', region: 'asia-south', name: 'Kannada', skipLight: true, coefProb: 0 },
103 |   tel: { code: 'te', region: 'asia-south', name: 'Telugu', skipLight: true, coefProb: 0 },
104 | 
105 |   // europe
106 |   fra: { code: 'fr', region: 'europe-west', name: 'French' },
107 |   eng: { code: 'en', region: 'europe-west', name: 'English', alias: ['us', 'gb'] },
108 |   deu: { code: 'de', region: 'europe-west', name: 'German', coefProb: 0.5 },
109 |   spa: { code: 'es', region: 'europe-west', name: 'Spanish' },
110 |   // cat: { code: 'ca', region: 'europe-west', name: 'Catalan', skipLight: true },
111 |   por: { code: 'pt', region: 'europe-west', name: 'Portuguese', alias: ['po'] },
112 |   ita: { code: 'it', region: 'europe-west', name: 'Italian' },
113 |   nld: { code: 'nl', region: 'europe-west', name: 'Dutch' },
114 |   gle: { code: 'ga', region: 'europe-west', name: 'Irish', skipLight: true },
115 |   lat: { code: 'la', region: 'europe', name: 'Latin', skipLight: true },
116 |   ces: { code: 'cs', region: 'europe', name: 'Czech', skipLight: true },
117 |   // hrv: { code: 'hr', region: 'europe', name: 'Croatian', skipLight: true },
118 |   srp: { code: 'sr', region: 'europe', name: 'Serbian', skipLight: true },
119 |   ell: { code: 'el', region: 'europe', name: 'Greek', alias: ['gr'], coefProb: 0 },
120 |   mkd: { code: 'mk', region: 'europe', name: 'Macedonian', skipLight: true, coefProb: 1.5 },
121 |   slk: { code: 'sk', region: 'europe', name: 'Slovak', skipLight: true },
122 |   // slv: { code: 'sl', region: 'europe', name: 'Slovenian', skipLight: true },
123 |   dan: { code: 'da', region: 'europe-north', name: 'Danish', skipLight: true, coefProb: 1.5 },
124 |   swe: { code: 'sv', region: 'europe-north', name: 'Swedish' },
125 |   fin: { code: 'fi', region: 'europe-north', name: 'Finnish' },
126 |   nob: { code: 'no', region: 'europe-north', name: 'Norwegian', coefProb: 1.5 },
127 |   isl: { code: 'is', region: 'europe-north', name: 'Icelandic', skipLight: true, coefProb: 0.5 },
128 |   hun: { code: 'hu', region: 'europe-east', name: 'Hungarian' },
129 |   ron: { code: 'ro', region: 'europe-east', name: 'Romanian', coefProb: 0.5 },
130 |   bul: { code: 'bg', region: 'europe-east', name: 'Bulgarian', skipLight: true },
131 |   bel: { code: 'be', region: 'europe-east', name: 'Belarusian', skipLight: true },
132 |   rus: { code: 'ru', region: 'europe-east', name: 'Russian' },
133 |   ukr: { code: 'uk', region: 'europe-east', name: 'Ukrainian', skipLight: true },
134 |   pol: { code: 'pl', region: 'europe-east', name: 'Polish', coefProb: 0.5 },
135 |   lit: { code: 'lt', region: 'europe-east', name: 'Lithuanian', skipLight: true },
136 |   est: { code: 'et', region: 'europe-east', name: 'Estonian', skipLight: true },
137 |   lvs: { code: 'lv', region: 'europe-east', name: 'Latvian', skipLight: true },
138 | 
139 |   // middle east
140 |   hye: { code: 'hy', region: 'middle-east', name: 'Armenian', skipLight: true, coefProb: 0 },
141 |   tur: { code: 'tr', region: 'middle-east', name: 'Turkish' },
142 |   heb: { code: 'he', region: 'middle-east', name: 'Hebrew', coefProb: 0 },
143 |   yid: { code: 'yi', region: 'middle-east', name: 'Yiddish', skipLight: true, coefProb: 0.5 },
144 |   ara: { code: 'ar', region: 'middle-east', name: 'Arabic', coefProb: 0 },
145 |   pes: { code: 'fa', region: 'middle-east', name: 'Persian', skipLight: true, coefProb: 0 },
146 |   tat: { code: 'tt', region: 'middle-east', name: 'Tatar', skipLight: true, coefProb: 0 },
147 |   // kab: { code: 'kb', region: 'middle-east', name: 'Kabyle', skipLight: true },
148 |   kaz: { code: 'kk', region: 'middle-east', name: 'Kazakh', skipLight: true },
149 |   mon: { code: 'mn', region: 'middle-east', name: 'Mongolian', skipLight: true },
150 |   tuk: { code: 'tk', region: 'middle-east', name: 'Turkmen', skipLight: true },
151 |   // uzb: { code: 'uz', region: 'middle-east', name: 'Uzbek', skipLight: true }
152 | 
153 |   // other
154 |   epo: { code: 'eo', region: 'other', name: 'Esperanto', skipLight: true, coefProb: 0.5 },
155 |   vol: { code: 'vo', region: 'other', name: 'Volapuk', skipLight: true, coefProb: 0.5 },
156 |   // toki: { code: 'toki', region: 'other', name: 'Toki Pona', skipLight: true, coefProb: 0.1 },
157 |   tlh: { code: 'tlh', region: 'other', name: 'Klingon', skipLight: true, coefProb: 0.25 }
158 | }
159 | 
160 | export const langs = new Set(
161 |   Object.entries(langMap)
162 |     .filter((x) => configSet === 'heavy' || configSet === 'normal' || (configSet === 'light' && !x[1].skipLight))
163 |     .map((x) => x[0])
164 | )
165 | export const supportedLanguages = [...langs.values()]
166 | export const langToId = Object.fromEntries(supportedLanguages.map((x, i) => [x, i + 1]))
167 | export const langFromId = Object.fromEntries(supportedLanguages.map((x, i) => [i + 1, x]))
168 | 
169 | export function langRegion(iso3: string): string {
170 |   if (iso3 in langMap) return langMap[iso3].region
171 |   return ''
172 | }
173 | 
174 | export function langName(iso3: string): string {
175 |   if (iso3 in langMap) return langMap[iso3].name
176 |   return ''
177 | }
178 | 
179 | export function validateISO2(iso2: string): string {
180 |   const found = Object.entries(langMap).find((x) => x[1].code === iso2)
181 |   if (found) return found[1].code
182 |   const foundAlias = Object.entries(langMap).find((x) => x[1].alias && x[1].alias.includes(iso2))
183 |   return foundAlias ? foundAlias[1].code : ''
184 | }
185 | 
186 | export function toISO2(iso3: string): string {
187 |   if (iso3 in langMap) return langMap[iso3].code
188 |   return iso3
189 | }
190 | 
191 | export function toISO3(iso2: string): string {
192 |   const found = Object.entries(langMap).find((x) => x[1].code === iso2)
193 |   if (found) return found[0]
194 |   const foundAlias = Object.entries(langMap).find((x) => x[1].alias && x[1].alias.includes(iso2))
195 |   if (foundAlias) return foundAlias[0]
196 |   return ''
197 | }
198 | 


--------------------------------------------------------------------------------
/docs/overall.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:ct="http://gionkunz.github.com/chartist-js/ct" width="1200" height="792" class="ct-chart-bar" style="background:#FFF;background:#FFF;"><g class="ct-grids" transform="translate(0, 72)"><line x1="50" x2="50" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="176.11111111111111" x2="176.11111111111111" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="302.22222222222223" x2="302.22222222222223" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="428.33333333333337" x2="428.33333333333337" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="554.4444444444445" x2="554.4444444444445" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="680.5555555555555" x2="680.5555555555555" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="806.6666666666667" x2="806.6666666666667" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="932.7777777777778" x2="932.7777777777778" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="1058.888888888889" x2="1058.888888888889" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="565" y2="565" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="525.7142857142857" y2="525.7142857142857" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="486.42857142857144" y2="486.42857142857144" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="447.1428571428571" y2="447.1428571428571" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="407.8571428571429" y2="407.8571428571429" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="368.57142857142856" y2="368.57142857142856" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="329.2857142857143" y2="329.2857142857143" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="290" y2="290" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="250.71428571428572" y2="250.71428571428572" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="211.42857142857144" y2="211.42857142857144" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="172.14285714285717" y2="172.14285714285717" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="132.85714285714283" y2="132.85714285714283" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="93.57142857142856" y2="93.57142857142856" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="54.28571428571428" y2="54.28571428571428" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="15" y2="15" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line></g><g transform="translate(0, 72)"><g class="ct-series ct-series-a"><line x1="113.05555555555556" x2="113.05555555555556" y1="565" y2="20.900714285714344" class="ct-bar" ct:value="99.249" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="239.16666666666669" x2="239.16666666666669" y1="565" y2="26.604214285714306" class="ct-bar" ct:value="98.5231" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="365.27777777777777" x2="365.27777777777777" y1="565" y2="31.67442857142862" class="ct-bar" ct:value="97.8778" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="491.3888888888889" x2="491.3888888888889" y1="565" y2="48.98214285714289" class="ct-bar" ct:value="95.675" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="617.5" x2="617.5" y1="565" y2="74.98614285714302" class="ct-bar" ct:value="92.3654" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="743.6111111111111" x2="743.6111111111111" y1="565" y2="217.26092857142862" class="ct-bar" ct:value="74.2577" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="869.7222222222223" x2="869.7222222222223" y1="565" y2="247.6570714285715" class="ct-bar" ct:value="70.3891" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="995.8333333333334" x2="995.8333333333334" y1="565" y2="276.5792142857142" class="ct-bar" ct:value="66.7081" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line><line x1="1121.9444444444446" x2="1121.9444444444446" y1="565" y2="287.77250000000004" class="ct-bar" ct:value="65.2835" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:40px !important;stroke-width:40px !important;stroke:#468966;"></line></g><g class="ct-series ct-series-b"><line x1="113.05555555555556" x2="113.05555555555556" y1="20.900714285714344" y2="20.875571428571448" class="ct-bar" ct:value="0.0032" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="239.16666666666669" x2="239.16666666666669" y1="26.604214285714306" y2="25.773714285714277" class="ct-bar" ct:value="0.1057" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="365.27777777777777" x2="365.27777777777777" y1="31.67442857142862" y2="30.590142857142837" class="ct-bar" ct:value="0.138" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="491.3888888888889" x2="491.3888888888889" y1="48.98214285714289" y2="48.98214285714289" class="ct-bar" ct:value="0" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="617.5" x2="617.5" y1="74.98614285714302" y2="27.73878571428577" class="ct-bar" ct:value="6.0133" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="743.6111111111111" x2="743.6111111111111" y1="217.26092857142862" y2="217.26092857142862" class="ct-bar" ct:value="0" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="869.7222222222223" x2="869.7222222222223" y1="247.6570714285715" y2="197.19849999999997" class="ct-bar" ct:value="6.422" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="995.8333333333334" x2="995.8333333333334" y1="276.5792142857142" y2="276.5792142857142" class="ct-bar" ct:value="0" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line><line x1="1121.9444444444446" x2="1121.9444444444446" y1="287.77250000000004" y2="103.63485714285707" class="ct-bar" ct:value="23.4357" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:40px !important;stroke-width:40px !important;stroke:#FEC771;"></line></g><g class="ct-series ct-series-c"><line x1="113.05555555555556" x2="113.05555555555556" y1="20.875571428571448" y2="15" class="ct-bar" ct:value="0.7478" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="239.16666666666669" x2="239.16666666666669" y1="25.773714285714277" y2="15" class="ct-bar" ct:value="1.3712" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="365.27777777777777" x2="365.27777777777777" y1="30.590142857142837" y2="15" class="ct-bar" ct:value="1.9842" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="491.3888888888889" x2="491.3888888888889" y1="48.98214285714289" y2="15" class="ct-bar" ct:value="4.325" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="617.5" x2="617.5" y1="27.73878571428577" y2="15" class="ct-bar" ct:value="1.6213" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="743.6111111111111" x2="743.6111111111111" y1="217.26092857142862" y2="15" class="ct-bar" ct:value="25.7423" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="869.7222222222223" x2="869.7222222222223" y1="197.19849999999997" y2="15.000785714285598" class="ct-bar" ct:value="23.1888" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="995.8333333333334" x2="995.8333333333334" y1="276.5792142857142" y2="15" class="ct-bar" ct:value="33.2919" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line><line x1="1121.9444444444446" x2="1121.9444444444446" y1="103.63485714285707" y2="15" class="ct-bar" ct:value="11.2808" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#EB7070;stroke-width:40px !important;stroke-width:40px !important;stroke:#EB7070;"></line></g></g><g class="ct-labels" transform="translate(0, 72)"><text x="50" y="585" width="126.11111111111111" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 50, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">tinyld-heavy</text><text x="176.11111111111111" y="585" width="126.11111111111111" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 176.11111111111111, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">tinyld</text><text x="302.22222222222223" y="585" width="126.11111111111114" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 302.22222222222223, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">tinyld-light</text><text x="428.33333333333337" y="585" width="126.11111111111109" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 428.33333333333337, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">langdetect</text><text x="554.4444444444445" y="585" width="126.11111111111109" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 554.4444444444445, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">cld</text><text x="680.5555555555555" y="585" width="126.1111111111112" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 680.5555555555555, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">franc</text><text x="806.6666666666667" y="585" width="126.11111111111109" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 806.6666666666667, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">franc-min</text><text x="932.7777777777778" y="585" width="126.11111111111109" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 932.7777777777778, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">franc-all</text><text x="1058.888888888889" y="585" width="126.11111111111109" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 1058.888888888889, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">languagedetect</text><text y="565" x="40" height="39.285714285714285" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">30</text><text y="525.7142857142857" x="40" height="39.285714285714285" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">35</text><text y="486.42857142857144" x="40" height="39.28571428571429" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">40</text><text y="447.1428571428571" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">45</text><text y="407.8571428571429" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">50</text><text y="368.57142857142856" x="40" height="39.285714285714306" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">55</text><text y="329.2857142857143" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">60</text><text y="290" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">65</text><text y="250.71428571428572" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">70</text><text y="211.42857142857144" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">75</text><text y="172.14285714285717" x="40" height="39.285714285714334" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">80</text><text y="132.85714285714283" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">85</text><text y="93.57142857142856" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">90</text><text y="54.28571428571428" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">95</text><text y="15" x="40" height="30" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">100</text></g><text x="600" y="28.799999999999997" height="48" font-size="18px" font-family="Verdana" font-weight="bold" fill="crimson" text-anchor="middle" role="caption">NodeJS Language Detection - Overall Accuracy</text><text x="600" y="62.4" height="24" font-size="12px" font-family="Verdana" font-weight="bold" fill="indianred" text-anchor="middle"> (green: Success, orange: Unidentified, red: Error)</text></svg>


--------------------------------------------------------------------------------
/docs/language.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:ct="http://gionkunz.github.com/chartist-js/ct" width="1200" height="792" class="ct-chart-bar" style="background:#FFF;background:#FFF;"><g class="ct-grids" transform="translate(0, 72)"><line x1="50" x2="50" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="163.5" x2="163.5" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="277" x2="277" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="390.5" x2="390.5" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="504" x2="504" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="617.5" x2="617.5" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="731" x2="731" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="844.5" x2="844.5" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="958" x2="958" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line x1="1071.5" x2="1071.5" y1="15" y2="565" class="ct-grid ct-horizontal" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="565" y2="565" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="525.7142857142857" y2="525.7142857142857" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="486.42857142857144" y2="486.42857142857144" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="447.1428571428571" y2="447.1428571428571" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="407.8571428571429" y2="407.8571428571429" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="368.57142857142856" y2="368.57142857142856" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="329.2857142857143" y2="329.2857142857143" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="290" y2="290" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="250.71428571428572" y2="250.71428571428572" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="211.42857142857144" y2="211.42857142857144" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="172.14285714285717" y2="172.14285714285717" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="132.85714285714283" y2="132.85714285714283" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="93.57142857142856" y2="93.57142857142856" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="54.28571428571428" y2="54.28571428571428" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line><line y1="15" y2="15" x1="50" x2="1185" class="ct-grid ct-vertical" style="stroke:rgba(0,0,0,.2);stroke-width:1px;stroke-dasharray:2px;stroke-dasharray:2px;stroke-width:1px;stroke:rgba(0,0,0,.2);"></line></g><g transform="translate(0, 72)"><g class="ct-series ct-series-a"><line x1="82.75" x2="82.75" y1="565" y2="15.078571428571422" class="ct-bar" ct:value="99.99" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="196.25" x2="196.25" y1="565" y2="15" class="ct-bar" ct:value="100" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="309.75" x2="309.75" y1="565" y2="18.37857142857149" class="ct-bar" ct:value="99.57" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="423.25" x2="423.25" y1="565" y2="18.221428571428532" class="ct-bar" ct:value="99.59" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="536.75" x2="536.75" y1="565" y2="25.292857142857088" class="ct-bar" ct:value="98.69" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="650.25" x2="650.25" y1="565" y2="20.735714285714266" class="ct-bar" ct:value="99.27" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="763.75" x2="763.75" y1="565" y2="22.85714285714289" class="ct-bar" ct:value="99" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="877.25" x2="877.25" y1="565" y2="45.407142857142844" class="ct-bar" ct:value="96.13" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="990.75" x2="990.75" y1="565" y2="45.17142857142858" class="ct-bar" ct:value="96.16" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line><line x1="1104.25" x2="1104.25" y1="565" y2="29.850000000000023" class="ct-bar" ct:value="98.11" style="stroke:#d70206;stroke:#d70206;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#468966;stroke-width:16px !important;stroke-width:16px !important;stroke:#468966;"></line></g><g class="ct-series ct-series-b"><line x1="98.75" x2="98.75" y1="565" y2="15.078571428571422" class="ct-bar" ct:value="99.99" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="212.25" x2="212.25" y1="565" y2="17.14735714285723" class="ct-bar" ct:value="99.7267" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="325.75" x2="325.75" y1="565" y2="29.14285714285711" class="ct-bar" ct:value="98.2" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="439.25" x2="439.25" y1="565" y2="17.907142857142844" class="ct-bar" ct:value="99.63" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="552.75" x2="552.75" y1="565" y2="30.792857142857088" class="ct-bar" ct:value="97.99" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="666.25" x2="666.25" y1="565" y2="50.200000000000045" class="ct-bar" ct:value="95.52" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="779.75" x2="779.75" y1="565" y2="45.17142857142858" class="ct-bar" ct:value="96.16" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="893.25" x2="893.25" y1="565" y2="95.85000000000002" class="ct-bar" ct:value="89.71" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="1006.75" x2="1006.75" y1="565" y2="84.92857142857144" class="ct-bar" ct:value="91.1" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line><line x1="1120.25" x2="1120.25" y1="565" y2="51.535714285714334" class="ct-bar" ct:value="95.35" style="stroke:#f05b4f;stroke:#f05b4f;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#98BAE7;stroke-width:16px !important;stroke-width:16px !important;stroke:#98BAE7;"></line></g><g class="ct-series ct-series-c"><line x1="114.75" x2="114.75" y1="565" y2="15" class="ct-bar" ct:value="100" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="228.25" x2="228.25" y1="565" y2="15" class="ct-bar" ct:value="100" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="341.75" x2="341.75" y1="565" y2="58.05714285714288" class="ct-bar" ct:value="94.52" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="455.25" x2="455.25" y1="565" y2="184.71428571428567" class="ct-bar" ct:value="78.4" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="568.75" x2="568.75" y1="565" y2="44.071428571428555" class="ct-bar" ct:value="96.3" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="682.25" x2="682.25" y1="565" y2="96.00714285714287" class="ct-bar" ct:value="89.69" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="795.75" x2="795.75" y1="565" y2="61.75" class="ct-bar" ct:value="94.05" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="909.25" x2="909.25" y1="565" y2="110.93571428571425" class="ct-bar" ct:value="87.79" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="1022.75" x2="1022.75" y1="565" y2="78.72142857142859" class="ct-bar" ct:value="91.89" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line><line x1="1136.25" x2="1136.25" y1="565" y2="21.442857142857065" class="ct-bar" ct:value="99.18" style="stroke:#f4c63d;stroke:#f4c63d;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#FEC771;stroke-width:16px !important;stroke-width:16px !important;stroke:#FEC771;"></line></g><g class="ct-series ct-series-d"><line x1="130.75" x2="130.75" y1="565" y2="15.54999999999984" class="ct-bar" ct:value="99.93" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="244.25" x2="244.25" y1="565" y2="16.07407142857153" class="ct-bar" ct:value="99.8633" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="357.75" x2="357.75" y1="565" y2="20.10714285714289" class="ct-bar" ct:value="99.35" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="471.25" x2="471.25" y1="565" y2="79.1142857142857" class="ct-bar" ct:value="91.84" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="584.75" x2="584.75" y1="565" y2="183.7714285714286" class="ct-bar" ct:value="78.52" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="698.25" x2="698.25" y1="565" y2="389.62857142857143" class="ct-bar" ct:value="52.32" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="811.75" x2="811.75" y1="565" y2="177.17142857142858" class="ct-bar" ct:value="79.36" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="925.25" x2="925.25" y1="565" y2="398.1142857142857" class="ct-bar" ct:value="51.24" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="1038.75" x2="1038.75" y1="565" y2="311.29285714285714" class="ct-bar" ct:value="62.29" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line><line x1="1152.25" x2="1152.25" y1="565" y2="326.22142857142853" class="ct-bar" ct:value="60.39" style="stroke:#d17905;stroke:#d17905;fill:none;stroke-width:10px;stroke-width:10px;fill:none;stroke:#F38181;stroke-width:16px !important;stroke-width:16px !important;stroke:#F38181;"></line></g></g><g class="ct-labels" transform="translate(0, 72)"><text x="50" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 50, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">JPN</text><text x="163.5" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 163.5, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">KOR</text><text x="277" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 277, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">CMN</text><text x="390.5" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 390.5, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">ARA</text><text x="504" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 504, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">FIN</text><text x="617.5" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 617.5, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">RUS</text><text x="731" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 731, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">FRA</text><text x="844.5" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 844.5, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">SPA</text><text x="958" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 958, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">POR</text><text x="1071.5" y="585" width="113.5" height="20" class="ct-label ct-horizontal ct-end" transform="rotate(20, 1071.5, 585) translate(-10, 0)" style="text-anchor:start;text-align:center;justify-content:center;text-anchor:start;text-align:left;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;fill:crimson;text-anchor:start;text-anchor:start;fill:crimson;font-size:14px;font-weight:bold;font-family:Courier;">ENG</text><text y="565" x="40" height="39.285714285714285" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">30</text><text y="525.7142857142857" x="40" height="39.285714285714285" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">35</text><text y="486.42857142857144" x="40" height="39.28571428571429" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">40</text><text y="447.1428571428571" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">45</text><text y="407.8571428571429" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">50</text><text y="368.57142857142856" x="40" height="39.285714285714306" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">55</text><text y="329.2857142857143" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">60</text><text y="290" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">65</text><text y="250.71428571428572" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">70</text><text y="211.42857142857144" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">75</text><text y="172.14285714285717" x="40" height="39.285714285714334" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">80</text><text y="132.85714285714283" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">85</text><text y="93.57142857142856" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">90</text><text y="54.28571428571428" x="40" height="39.28571428571428" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">95</text><text y="15" x="40" height="30" width="30" class="ct-label ct-vertical ct-start" style="text-anchor:end;text-align:right;display:block;fill:rgba(0,0,0,.4);color:rgba(0,0,0,.4);font-size:12px;line-height:1;line-height:1;font-size:12px;color:rgba(0,0,0,.4);fill:rgba(0,0,0,.4);font-family:Courier;font-weight:bold;font-size:14px;text-anchor:end;text-anchor:end;font-size:14px;font-weight:bold;font-family:Courier;">100</text></g><text x="600" y="28.799999999999997" height="48" font-size="18px" font-family="Verdana" font-weight="bold" fill="crimson" text-anchor="middle" role="caption">NodeJS Language Detection - Per Language</text><text x="600" y="62.4" height="24" font-size="12px" font-family="Verdana" font-weight="bold" fill="indianred" text-anchor="middle">Tinyld vs Langdetect vs Cld vs Franc</text></svg>


--------------------------------------------------------------------------------