├── .npmrc ├── funding.yml ├── .prettierignore ├── packages ├── franc-all │ ├── .npmrc │ ├── license │ ├── package.json │ ├── expressions.js │ └── readme.md ├── franc-cli │ ├── .npmrc │ ├── readme.md │ ├── license │ ├── package.json │ └── index.js ├── franc-min │ ├── .npmrc │ ├── license │ ├── package.json │ ├── expressions.js │ └── readme.md └── franc │ ├── .npmrc │ ├── license │ ├── package.json │ ├── expressions.js │ ├── index.js │ └── readme.md ├── changelog.md ├── .gitignore ├── test ├── index.js ├── cli.js └── api.js ├── .editorconfig ├── tsconfig.json ├── .github └── workflows │ └── main.yml ├── license ├── logo.svg ├── script ├── custom-fixtures.js └── build.js ├── package.json └── readme.md /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /funding.yml: -------------------------------------------------------------------------------- 1 | github: wooorm 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | *.md 3 | -------------------------------------------------------------------------------- /packages/franc-all/.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /packages/franc-cli/.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /packages/franc-min/.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /packages/franc/.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | See [GitHub Releases][releases] for the changelog. 4 | 5 | [releases]: https://github.com/wooorm/franc/releases 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | node_modules/ 3 | packages/franc-all/index.js 4 | packages/franc-min/index.js 5 | .DS_Store 6 | *.d.ts 7 | *.log 8 | yarn.lock 9 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable import/no-unassigned-import */ 2 | import './api.js' 3 | import './cli.js' 4 | /* eslint-enable import/no-unassigned-import */ 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /packages/franc-cli/readme.md: -------------------------------------------------------------------------------- 1 | # franc-cli 2 | 3 | > CLI to detect the language of text. 4 | 5 | View the [monorepo](https://github.com/wooorm/franc) for more packages 6 | and usage information. 7 | 8 | ## Install 9 | 10 | ```sh 11 | npm install franc-cli --global 12 | ``` 13 | 14 | ## License 15 | 16 | [MIT](https://github.com/wooorm/franc/blob/franc/license) © [Titus Wormer](http://wooorm.com) 17 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["**/**.js"], 3 | "exclude": ["coverage", "node_modules"], 4 | "compilerOptions": { 5 | "checkJs": true, 6 | "declaration": true, 7 | "emitDeclarationOnly": true, 8 | "exactOptionalPropertyTypes": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "lib": ["es2020"], 11 | "module": "node16", 12 | "newLine": "lf", 13 | "skipLibCheck": true, 14 | "strict": true, 15 | "target": "es2020" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | on: 3 | - pull_request 4 | - push 5 | jobs: 6 | main: 7 | name: ${{matrix.node}} 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-node@v3 12 | with: 13 | node-version: ${{matrix.node}} 14 | - run: npm install 15 | - run: npm test 16 | - uses: codecov/codecov-action@v1 17 | strategy: 18 | matrix: 19 | node: 20 | - lts/hydrogen 21 | - node 22 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2014 Titus Wormer 4 | Copyright (c) 2008 Kent S Johnson 5 | Copyright (c) 2006 Jacob R Rideout 6 | Copyright (c) 2004 Maciej Ceglowski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | 'Software'), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /packages/franc/license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2014 Titus Wormer 4 | Copyright (c) 2008 Kent S Johnson 5 | Copyright (c) 2006 Jacob R Rideout 6 | Copyright (c) 2004 Maciej Ceglowski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | 'Software'), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /packages/franc-all/license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2014 Titus Wormer 4 | Copyright (c) 2008 Kent S Johnson 5 | Copyright (c) 2006 Jacob R Rideout 6 | Copyright (c) 2004 Maciej Ceglowski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | 'Software'), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /packages/franc-cli/license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2014 Titus Wormer 4 | Copyright (c) 2008 Kent S Johnson 5 | Copyright (c) 2006 Jacob R Rideout 6 | Copyright (c) 2004 Maciej Ceglowski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | 'Software'), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /packages/franc-min/license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2014 Titus Wormer 4 | Copyright (c) 2008 Kent S Johnson 5 | Copyright (c) 2006 Jacob R Rideout 6 | Copyright (c) 2004 Maciej Ceglowski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | 'Software'), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /packages/franc-cli/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "franc-cli", 3 | "version": "8.0.0", 4 | "description": "CLI to detect the language of text", 5 | "license": "MIT", 6 | "keywords": [ 7 | "natural", 8 | "language", 9 | "writing", 10 | "system", 11 | "detect", 12 | "guess", 13 | "cli", 14 | "bin" 15 | ], 16 | "repository": "https://github.com/wooorm/franc/tree/main/packages/franc-cli", 17 | "bugs": "https://github.com/wooorm/franc/issues", 18 | "funding": { 19 | "type": "github", 20 | "url": "https://github.com/sponsors/wooorm" 21 | }, 22 | "author": "Titus Wormer (http://wooorm.com)", 23 | "contributors": [ 24 | "Titus Wormer (http://wooorm.com)", 25 | "Kamil Bielawski ", 26 | "Andrew Pantyukhin ", 27 | "Dmitriy Sobolev ", 28 | "Jeff Huijsmans " 29 | ], 30 | "type": "module", 31 | "main": "index.js", 32 | "bin": { 33 | "franc": "index.js" 34 | }, 35 | "files": [ 36 | "index.js" 37 | ], 38 | "dependencies": { 39 | "franc": "^6.0.0", 40 | "meow": "^10.0.0" 41 | }, 42 | "scripts": {}, 43 | "xo": { 44 | "prettier": true, 45 | "rules": { 46 | "camelcase": "off", 47 | "max-depth": "off", 48 | "no-misleading-character-class": "off" 49 | } 50 | }, 51 | "typeCoverage": { 52 | "atLeast": 100, 53 | "detail": true, 54 | "strict": true, 55 | "ignoreCatch": true 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /packages/franc/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "franc", 3 | "threshold": 1000000, 4 | "version": "6.2.0", 5 | "description": "Detect the language of text", 6 | "license": "MIT", 7 | "keywords": [ 8 | "natural", 9 | "language", 10 | "writing", 11 | "system", 12 | "detect", 13 | "guess" 14 | ], 15 | "repository": "https://github.com/wooorm/franc/tree/main/packages/franc", 16 | "bugs": "https://github.com/wooorm/franc/issues", 17 | "funding": { 18 | "type": "github", 19 | "url": "https://github.com/sponsors/wooorm" 20 | }, 21 | "author": "Titus Wormer (http://wooorm.com)", 22 | "contributors": [ 23 | "Titus Wormer (http://wooorm.com)", 24 | "Kamil Bielawski ", 25 | "Andrew Pantyukhin ", 26 | "Dmitriy Sobolev ", 27 | "Jeff Huijsmans " 28 | ], 29 | "sideEffects": false, 30 | "type": "module", 31 | "main": "index.js", 32 | "types": "index.d.ts", 33 | "files": [ 34 | "data.d.ts", 35 | "data.js", 36 | "expressions.d.ts", 37 | "expressions.js", 38 | "index.d.ts", 39 | "index.js" 40 | ], 41 | "dependencies": { 42 | "trigram-utils": "^2.0.0" 43 | }, 44 | "scripts": {}, 45 | "xo": { 46 | "prettier": true, 47 | "rules": { 48 | "camelcase": "off", 49 | "max-depth": "off", 50 | "no-misleading-character-class": "off" 51 | } 52 | }, 53 | "typeCoverage": { 54 | "atLeast": 100, 55 | "detail": true, 56 | "strict": true, 57 | "ignoreCatch": true 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /packages/franc-all/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "franc-all", 3 | "threshold": -1, 4 | "version": "7.2.0", 5 | "description": "Detect the language of text", 6 | "license": "MIT", 7 | "keywords": [ 8 | "natural", 9 | "language", 10 | "writing", 11 | "system", 12 | "detect", 13 | "guess" 14 | ], 15 | "repository": "https://github.com/wooorm/franc/tree/main/packages/franc-all", 16 | "bugs": "https://github.com/wooorm/franc/issues", 17 | "funding": { 18 | "type": "github", 19 | "url": "https://github.com/sponsors/wooorm" 20 | }, 21 | "author": "Titus Wormer (http://wooorm.com)", 22 | "contributors": [ 23 | "Titus Wormer (http://wooorm.com)", 24 | "Kamil Bielawski ", 25 | "Andrew Pantyukhin ", 26 | "Dmitriy Sobolev ", 27 | "Jeff Huijsmans " 28 | ], 29 | "sideEffects": false, 30 | "type": "module", 31 | "main": "index.js", 32 | "types": "index.d.ts", 33 | "files": [ 34 | "data.d.ts", 35 | "data.js", 36 | "expressions.d.ts", 37 | "expressions.js", 38 | "index.d.ts", 39 | "index.js" 40 | ], 41 | "dependencies": { 42 | "trigram-utils": "^2.0.0" 43 | }, 44 | "scripts": {}, 45 | "xo": { 46 | "prettier": true, 47 | "rules": { 48 | "camelcase": "off", 49 | "max-depth": "off", 50 | "no-misleading-character-class": "off" 51 | } 52 | }, 53 | "typeCoverage": { 54 | "atLeast": 100, 55 | "detail": true, 56 | "strict": true, 57 | "ignoreCatch": true 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /packages/franc-min/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "franc-min", 3 | "threshold": 8000000, 4 | "version": "6.2.0", 5 | "description": "Detect the language of text", 6 | "license": "MIT", 7 | "keywords": [ 8 | "natural", 9 | "language", 10 | "writing", 11 | "system", 12 | "detect", 13 | "guess" 14 | ], 15 | "repository": "https://github.com/wooorm/franc/tree/main/packages/franc-min", 16 | "bugs": "https://github.com/wooorm/franc/issues", 17 | "funding": { 18 | "type": "github", 19 | "url": "https://github.com/sponsors/wooorm" 20 | }, 21 | "author": "Titus Wormer (http://wooorm.com)", 22 | "contributors": [ 23 | "Titus Wormer (http://wooorm.com)", 24 | "Kamil Bielawski ", 25 | "Andrew Pantyukhin ", 26 | "Dmitriy Sobolev ", 27 | "Jeff Huijsmans " 28 | ], 29 | "sideEffects": false, 30 | "type": "module", 31 | "main": "index.js", 32 | "types": "index.d.ts", 33 | "files": [ 34 | "data.d.ts", 35 | "data.js", 36 | "expressions.d.ts", 37 | "expressions.js", 38 | "index.d.ts", 39 | "index.js" 40 | ], 41 | "dependencies": { 42 | "trigram-utils": "^2.0.0" 43 | }, 44 | "scripts": {}, 45 | "xo": { 46 | "prettier": true, 47 | "rules": { 48 | "camelcase": "off", 49 | "max-depth": "off", 50 | "no-misleading-character-class": "off" 51 | } 52 | }, 53 | "typeCoverage": { 54 | "atLeast": 100, 55 | "detail": true, 56 | "strict": true, 57 | "ignoreCatch": true 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /script/custom-fixtures.js: -------------------------------------------------------------------------------- 1 | /** @type {Record} */ 2 | export const customFixtures = { 3 | ori: 4 | 'ସବୁ ମନୁଷ୍ୟ ଜନ୍ମୁକାଳରୁ ସ୍ଵଧୀନ, ଷେମାନଙ୍କର ମର୍ସ୍ୟାଡା ଓ ' + 5 | 'ଅଧିକାର ସମାନ, ସେମାନଙଠାରେ ପ୍ରବଁ ଓ ବିବେକ ନିହ ଟଛି, ' + 6 | 'ସେମାନେ ପରସ୍ପର ପବ ବ୍ରାଦହବ ପୋଷଷ କରି ଠାର୍ପ୍ୟ ଜକିରା ଡରକାର.', 7 | 8 | tel: 9 | 'ప్రతిపత్తిస్వత్వముల విషయమున మానవులెల్లరును జన్మతః ' + 10 | 'స్వతంత్రులును సమానులును నగుదురు. వారు వివేదనాంతఃకరణ ' + 11 | 'సంపన్నులగుటచే పరస్పరము భ్రాతృభావముతో వర్తింపవలయును.', 12 | 13 | sin: 14 | 'සියලූ මනුෂ්‍යයෝ නිදහස්ව උපත ලබා ඇත. ' + 15 | 'ගරුත්වයෙන් හා අයිතිවාසිකම් සමාන වෙති. යුක්ති ' + 16 | 'අයුක්ති පිළිබඳ හැඟීමෙන් හා හෘදය සාක්ෂියෙන් යුත් ' + 17 | 'ඔවුනොවුන්වුන්ට සැළකිය යුත්තේ සහෝදරත්වය පිළිබඳ ' + 18 | 'හැඟීමෙනි.', 19 | 20 | /* 'water snatch pluck' 21 | * 22 | * hand encoded from: 23 | * - http://wesanthals.tripod.com/sitebuildercontent/sitebuilderfiles/ 24 | * uni_olchiki.pdf; 25 | * - http://scriptsource.org/cms/scripts/ 26 | * page.php?item_id=script_detail_sym&key=Olck. */ 27 | sat: 'ᱫᱟ᱕ ᱨᱮᱡ ᱜᱚᱫ', 28 | 29 | /* A prayer from: 30 | * 31 | * http://www.christusrex.com/www1/pater/JPN-guarani.html */ 32 | gug: 33 | 'Ore Ru, yvágape reiméva, toñembojeroviákena nde réra; ' + 34 | 'taoreañuamba ne mborayhu; tojejapo ne rembipota ko yvy ári, ' + 35 | "yvagapeguáicha. Eme'ê oréve ko árape ore rembi'urâ, opa " + 36 | 'ára roikotevêva; ehejareíkena oréve ore rembiapo vaikue, ' + 37 | 'rohejareiháicha ore rapichápe hembiapo vaikue orendive. ' + 38 | "Aníkena reheja roike rojepy'ara'â vai haguáme ha orepe'a " + 39 | "opa mba'e vaigui. Amen.", 40 | 41 | /** 42 | * From PDF to text by hand, the first few sentences from: 43 | * 44 | * The non-existing LATIN SMALL LETTER I WITH CIRCUMFLEX AND STROKE 45 | * is encoded as LATIN SMALL LETTER I WITH STROKE. 46 | * 47 | * http://www.language-museum.com/encyclopedia/g/guarayu.php 48 | */ 49 | 50 | gyr: 51 | 'Aracahe tũpa oyapo ɨva ɨvɨ avei no. Yɨpɨndar rumo ' + 52 | 'pɨtuño tẽi. Ndipo vɨtei eté arɨ, yasɨ, yasɨtata avei. ' + 53 | "Evocoiyase tũpa aipo ehi: 'toime icatu vahe', ehi. " + 54 | "Aheseve voi oime icatu vahe. Ipare aipo ehi: 'Icatu " + 55 | "vahe rer-ra raɨ, ndicatui vahe rer-ra evocoiyase pɨtu', " + 56 | 'ehi.', 57 | 58 | /* A prayer from, from PDF to text by hand, and I do not know 59 | * Cyrillic so I probably made some errors: 60 | * 61 | * http://www.christusrex.com/www1/pater/JPN-even.html 62 | * 63 | * `eve` looks a lot like `evn` and `tgk`. I stopped here, and its a 64 | * bit faking, but `franc` seems to detect the below sample correctly */ 65 | eve: 66 | 'Мут аманти, нбаанбандудэ биси! һии гэрбэс святибдэн; ' + 67 | 'һии Царствас эмдэн: һии энис теер-дэ ойдун; нбаанбандула-да ' + 68 | 'бидэн; Эрэк инэнит клебэнгэн мутту беели; Мут беенэвут ' + 69 | 'мутту простили, мутту бееннэлкэһэлбу мут-тэ простирап. ' + 70 | 'Искушениела муту эдби иивкэп (муту эдби эйэчукэн), һавка ' + 71 | 'елэкчим муттук һерукэли; һии Царствас энис эскэнси эрэгэр ' + 72 | 'бидэн. Аминб.' 73 | } 74 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "name": "franc", 4 | "description": "Monorepo for franc packages", 5 | "license": "MIT", 6 | "repository": "https://github.com/wooorm/franc", 7 | "bugs": "https://github.com/wooorm/franc/issues", 8 | "author": "Titus Wormer (http://wooorm.com)", 9 | "type": "module", 10 | "workspaces": [ 11 | "packages/franc", 12 | "packages/franc-all", 13 | "packages/franc-cli", 14 | "packages/franc-min" 15 | ], 16 | "devDependencies": { 17 | "@types/mdast": "^4.0.0", 18 | "@types/node": "^20.0.0", 19 | "@types/parse-author": "^2.0.0", 20 | "@unicode/unicode-15.0.0": "^1.0.0", 21 | "alpha-sort": "^5.0.0", 22 | "c8": "^9.0.0", 23 | "hast-util-from-html": "^2.0.0", 24 | "hast-util-select": "^6.0.0", 25 | "hast-util-to-string": "^3.0.0", 26 | "import-meta-resolve": "^4.0.0", 27 | "is-hidden": "^2.0.0", 28 | "iso-639-3": "^3.0.0", 29 | "parse-author": "^2.0.0", 30 | "prettier": "^3.0.0", 31 | "remark-cli": "^12.0.0", 32 | "remark-gfm": "^4.0.0", 33 | "remark-lint-no-heading-punctuation": "^3.0.0", 34 | "remark-lint-no-html": "^3.0.0", 35 | "remark-lint-table-pipe-alignment": "^3.0.0", 36 | "remark-preset-wooorm": "^9.0.0", 37 | "remark-stringify": "^11.0.0", 38 | "speakers": "^2.0.0", 39 | "trigrams": "^5.0.0", 40 | "type-coverage": "^2.0.0", 41 | "type-fest": "^4.0.0", 42 | "typescript": "^5.0.0", 43 | "udhr": "^5.0.0", 44 | "unified": "^11.0.0", 45 | "xo": "^0.56.0" 46 | }, 47 | "scripts": { 48 | "generate": "node --conditions development script/build.js", 49 | "build": "tsc --build --clean && tsc --build && type-coverage", 50 | "format": "remark . -qfo && prettier . -w --log-level warn && xo --fix", 51 | "test-api": "node --conditions development test/index.js", 52 | "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", 53 | "test": "npm run generate && npm run build && npm run format && npm run test-coverage" 54 | }, 55 | "prettier": { 56 | "tabWidth": 2, 57 | "useTabs": false, 58 | "singleQuote": true, 59 | "bracketSpacing": false, 60 | "semi": false, 61 | "trailingComma": "none" 62 | }, 63 | "xo": { 64 | "prettier": true, 65 | "rules": { 66 | "camelcase": "off", 67 | "max-depth": "off", 68 | "no-misleading-character-class": "off", 69 | "unicorn/prefer-string-replace-all": "off" 70 | } 71 | }, 72 | "remarkConfig": { 73 | "plugins": [ 74 | "remark-preset-wooorm", 75 | [ 76 | "remark-preset-wooorm/node_modules/remark-gfm/index.js", 77 | false 78 | ], 79 | [ 80 | "remark-gfm", 81 | { 82 | "tablePipeAlign": false 83 | } 84 | ], 85 | [ 86 | "remark-lint-list-item-indent", 87 | "space" 88 | ], 89 | [ 90 | "remark-lint-table-pipe-alignment", 91 | false 92 | ], 93 | [ 94 | "remark-lint-no-html", 95 | false 96 | ], 97 | [ 98 | "remark-lint-no-heading-punctuation", 99 | false 100 | ] 101 | ] 102 | }, 103 | "typeCoverage": { 104 | "atLeast": 100, 105 | "detail": true, 106 | "strict": true, 107 | "ignoreCatch": true 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /packages/franc-cli/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * @typedef {import('franc').Options} Options 4 | */ 5 | 6 | import process from 'node:process' 7 | import fs from 'node:fs/promises' 8 | import meow from 'meow' 9 | import {franc, francAll} from 'franc' 10 | 11 | /** @type {Record & {bin: Record}} */ 12 | const pack = JSON.parse( 13 | String(await fs.readFile(new URL('package.json', import.meta.url))) 14 | ) 15 | 16 | const command = Object.keys(pack.bin)[0] 17 | 18 | const cli = meow(help(), { 19 | importMeta: import.meta, 20 | flags: { 21 | all: { 22 | type: 'boolean', 23 | alias: 'a' 24 | }, 25 | whitelist: { 26 | type: 'string', 27 | alias: 'w' 28 | }, 29 | only: { 30 | type: 'string', 31 | alias: 'o' 32 | }, 33 | blacklist: { 34 | type: 'string', 35 | alias: 'b' 36 | }, 37 | ignore: { 38 | type: 'string', 39 | alias: 'i' 40 | }, 41 | minLength: { 42 | type: 'string', 43 | alias: 'm' 44 | }, 45 | version: { 46 | type: 'boolean', 47 | alias: 'v' 48 | }, 49 | help: { 50 | type: 'boolean', 51 | alias: 'h' 52 | } 53 | } 54 | }) 55 | 56 | const value = cli.input.join(' ').trim() 57 | const flags = cli.flags 58 | 59 | /** @type {Options} */ 60 | const options = { 61 | minLength: Number(flags.minLength) || undefined, 62 | // @ts-expect-error: legacy. 63 | whitelist: list(flags.whitelist), 64 | blacklist: list(flags.blacklist), 65 | only: list(flags.only), 66 | ignore: list(flags.ignore) 67 | } 68 | 69 | if (cli.input.length === 0) { 70 | process.stdin.resume() 71 | process.stdin.setEncoding('utf8') 72 | process.stdin.on('data', (data) => { 73 | detect(String(data).trim()) 74 | }) 75 | } else { 76 | detect(value) 77 | } 78 | 79 | /** 80 | * @param {string} value 81 | */ 82 | function detect(value) { 83 | if (flags.all) { 84 | const results = francAll(value, options) 85 | let index = -1 86 | while (++index < results.length) { 87 | console.log(results[index][0] + ' ' + results[index][1]) 88 | } 89 | } else { 90 | console.log(franc(value, options)) 91 | } 92 | } 93 | 94 | function help() { 95 | return [ 96 | 'Usage: ' + command + ' [options] ', 97 | '', 98 | 'Options:', 99 | '', 100 | ' -h, --help output usage information', 101 | ' -v, --version output version number', 102 | ' -m, --min-length minimum length to accept', 103 | ' -o, --only allow languages', 104 | ' -i, --ignore disallow languages', 105 | ' -a, --all display all guesses', 106 | '', 107 | 'Usage:', 108 | '', 109 | '# output language', 110 | '$ ' + command + ' "Alle menslike wesens word vry"', 111 | '# ' + franc('Alle menslike wesens word vry'), 112 | '', 113 | '# output language from stdin (expects utf8)', 114 | '$ echo "এটি একটি ভাষা একক IBM স্ক্রিপ্ট" | ' + command, 115 | '# ' + franc('এটি একটি ভাষা একক IBM স্ক্রিপ্ট'), 116 | '', 117 | '# ignore certain languages', 118 | '$ ' + command + ' --ignore por,glg "O Brasil caiu 26 posições"', 119 | '# ' + franc('O Brasil caiu 26 posições', {ignore: ['por', 'glg']}), 120 | '', 121 | '# output language from stdin with only', 122 | '$ echo "Alle mennesker er født frie og" | ' + command + ' --only nob,dan', 123 | '# ' + franc('Alle mennesker er født frie og', {only: ['nob', 'dan']}) 124 | ].join('\n') 125 | } 126 | 127 | /** 128 | * @param {string|undefined} value 129 | * @returns {Array} 130 | */ 131 | function list(value) { 132 | return value ? String(value).split(',') : [] 133 | } 134 | -------------------------------------------------------------------------------- /test/cli.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('type-fest').PackageJson} PackageJson 3 | */ 4 | 5 | import assert from 'node:assert/strict' 6 | import fs from 'node:fs/promises' 7 | import util from 'node:util' 8 | import {fileURLToPath} from 'node:url' 9 | import childProcess from 'node:child_process' 10 | import {PassThrough} from 'node:stream' 11 | import test from 'node:test' 12 | 13 | const exec = util.promisify(childProcess.exec) 14 | 15 | const root = new URL('../packages/franc-cli/', import.meta.url) 16 | 17 | /** @type {PackageJson} */ 18 | const pkg = JSON.parse(String(await fs.readFile(new URL('package.json', root)))) 19 | const cli = fileURLToPath(new URL('index.js', root)) 20 | 21 | test('cli', async () => { 22 | const af = 'Alle menslike wesens word vry' 23 | const no = 'Alle mennesker er født frie og' 24 | const ptBr = 'O Brasil caiu 26 posições' 25 | 26 | // Version. 27 | assert.deepEqual( 28 | await exec(cli + ' -v'), 29 | {stderr: '', stdout: pkg.version + '\n'}, 30 | '-v' 31 | ) 32 | assert.deepEqual( 33 | await exec(cli + ' --version'), 34 | {stderr: '', stdout: pkg.version + '\n'}, 35 | '--version' 36 | ) 37 | 38 | // Help. 39 | const h = await exec(cli + ' -h') 40 | assert.match(h.stdout, /^\s+CLI to detect the language of text/, '-h') 41 | const help = await exec(cli + ' --help') 42 | assert.match(help.stdout, /^\s+CLI to detect the language of text/, '--help') 43 | 44 | // Main. 45 | assert.deepEqual( 46 | await exec(cli + ' "' + af + '"'), 47 | {stderr: '', stdout: 'afr\n'}, 48 | 'argument' 49 | ) 50 | 51 | assert.deepEqual( 52 | await exec(cli + ' ' + af), 53 | {stderr: '', stdout: 'afr\n'}, 54 | 'multiple arguments' 55 | ) 56 | 57 | await new Promise(function (resolve) { 58 | const input = new PassThrough() 59 | const subprocess = childProcess.exec(cli, function (error, stdout, stderr) { 60 | assert.deepEqual([error, stdout, stderr], [null, 'afr\n', ''], 'stdin') 61 | resolve(undefined) 62 | }) 63 | assert(subprocess.stdin, 'expected stdin on `subprocess`') 64 | input.pipe(subprocess.stdin) 65 | input.write(af.slice(0, af.length / 2)) 66 | setImmediate(function () { 67 | input.end(af.slice(af.length / 2)) 68 | }) 69 | }) 70 | 71 | // Only. 72 | assert.deepEqual( 73 | await exec(cli + ' -o nob,dan "' + no + '"'), 74 | {stderr: '', stdout: 'nob\n'}, 75 | '-o' 76 | ) 77 | assert.deepEqual( 78 | await exec(cli + ' --only nob,dan "' + no + '"'), 79 | {stderr: '', stdout: 'nob\n'}, 80 | '--only' 81 | ) 82 | assert.deepEqual( 83 | await exec(cli + ' -w nob,dan "' + no + '"'), 84 | {stderr: '', stdout: 'nob\n'}, 85 | '-w' 86 | ) 87 | assert.deepEqual( 88 | await exec(cli + ' --whitelist nob,dan "' + no + '"'), 89 | {stderr: '', stdout: 'nob\n'}, 90 | '--whitelist' 91 | ) 92 | 93 | // Ignore. 94 | assert.deepEqual( 95 | await exec(cli + ' -i por,glg "' + ptBr + '"'), 96 | {stderr: '', stdout: 'vec\n'}, 97 | '-i' 98 | ) 99 | assert.deepEqual( 100 | await exec(cli + ' --ignore por,glg "' + ptBr + '"'), 101 | {stderr: '', stdout: 'vec\n'}, 102 | '--ignore' 103 | ) 104 | assert.deepEqual( 105 | await exec(cli + ' -b por,glg "' + ptBr + '"'), 106 | {stderr: '', stdout: 'vec\n'}, 107 | '-b' 108 | ) 109 | assert.deepEqual( 110 | await exec(cli + ' --blacklist por,glg "' + ptBr + '"'), 111 | {stderr: '', stdout: 'vec\n'}, 112 | '--blacklist' 113 | ) 114 | 115 | // Min. 116 | assert.deepEqual( 117 | await exec(cli + ' -m 3 "the"'), 118 | {stderr: '', stdout: 'sco\n'}, 119 | '-m' 120 | ) 121 | assert.deepEqual( 122 | await exec(cli + ' -m 4 "the"'), 123 | {stderr: '', stdout: 'und\n'}, 124 | '-m (unsatisfied)' 125 | ) 126 | assert.deepEqual( 127 | await exec(cli + ' --min-length 3 "the"'), 128 | {stderr: '', stdout: 'sco\n'}, 129 | '--min-length' 130 | ) 131 | assert.deepEqual( 132 | await exec(cli + ' --min-length 4 "the"'), 133 | {stderr: '', stdout: 'und\n'}, 134 | '--min-length (unsatisfied)' 135 | ) 136 | 137 | // All. 138 | const a = await exec(cli + ' -a "' + af + '"') 139 | assert.deepEqual( 140 | a.stdout.split('\n').slice(0, 3), 141 | ['afr 1', 'nld 0.7419425564569173', 'nob 0.5446174084630564'], 142 | '-a' 143 | ) 144 | const all = await exec(cli + ' --all "' + af + '"') 145 | assert.deepEqual( 146 | all.stdout.split('\n').slice(0, 3), 147 | ['afr 1', 'nld 0.7419425564569173', 'nob 0.5446174084630564'], 148 | '--all' 149 | ) 150 | }) 151 | -------------------------------------------------------------------------------- /packages/franc-min/expressions.js: -------------------------------------------------------------------------------- 1 | // This file is generated by `build.js`. 2 | /** @type {Record} */ 3 | export const expressions = { 4 | cmn: /[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFA6D\uFA70-\uFAD9]|\uD81B[\uDFE2\uDFE3\uDFF0\uDFF1]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883\uD885-\uD887][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF39\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A\uDF50-\uDFFF]|\uD888[\uDC00-\uDFAF]/g, 5 | Latin: 6 | /[A-Za-z\u00AA\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB64\uAB66-\uAB69\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A]|\uD801[\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD837[\uDF00-\uDF1E\uDF25-\uDF2A]/g, 7 | Cyrillic: 8 | /[\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]|\uD838[\uDC30-\uDC6D\uDC8F]/g, 9 | Arabic: 10 | /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u088E\u0890\u0891\u0898-\u08E1\u08E3-\u08FF\uFB50-\uFBC2\uFBD3-\uFD3D\uFD40-\uFD8F\uFD92-\uFDC7\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC]|\uD803[\uDE60-\uDE7E\uDEFD-\uDEFF]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB\uDEF0\uDEF1]/g, 11 | ben: /[\u0980-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09FE]/g, 12 | Devanagari: 13 | /[\u0900-\u0950\u0955-\u0963\u0966-\u097F\uA8E0-\uA8FF]|\uD806[\uDF00-\uDF09]/g, 14 | jpn: /[\u3041-\u3096\u309D-\u309F]|\uD82C[\uDC01-\uDD1F\uDD32\uDD50-\uDD52]|\uD83C\uDE00|[\u30A1-\u30FA\u30FD-\u30FF\u31F0-\u31FF\u32D0-\u32FE\u3300-\u3357\uFF66-\uFF6F\uFF71-\uFF9D]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00\uDD20-\uDD22\uDD55\uDD64-\uDD67]|[\u3400-\u4DB5\u4E00-\u9FAF]/g, 15 | jav: /[\uA980-\uA9CD\uA9D0-\uA9D9\uA9DE\uA9DF]/g, 16 | kor: /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/g, 17 | tel: /[\u0C00-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3C-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58-\u0C5A\u0C5D\u0C60-\u0C63\u0C66-\u0C6F\u0C77-\u0C7F]/g, 18 | tam: /[\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BFA]|\uD807[\uDFC0-\uDFF1\uDFFF]/g, 19 | guj: /[\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AF1\u0AF9-\u0AFF]/g, 20 | kan: /[\u0C80-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDD\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1-\u0CF3]/g, 21 | mal: /[\u0D00-\u0D0C\u0D0E-\u0D10\u0D12-\u0D44\u0D46-\u0D48\u0D4A-\u0D4F\u0D54-\u0D63\u0D66-\u0D7F]/g, 22 | mya: /[\u1000-\u109F\uA9E0-\uA9FE\uAA60-\uAA7F]/g, 23 | pan: /[\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A76]/g, 24 | amh: /[\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u137C\u1380-\u1399\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]/g, 25 | tha: /[\u0E01-\u0E3A\u0E40-\u0E5B]/g, 26 | sin: /[\u0D81-\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2-\u0DF4]|\uD804[\uDDE1-\uDDF4]/g, 27 | ell: /[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|\uD800[\uDD40-\uDD8E\uDDA0]|\uD834[\uDE00-\uDE45]/g 28 | } 29 | -------------------------------------------------------------------------------- /packages/franc/expressions.js: -------------------------------------------------------------------------------- 1 | // This file is generated by `build.js`. 2 | /** @type {Record} */ 3 | export const expressions = { 4 | cmn: /[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFA6D\uFA70-\uFAD9]|\uD81B[\uDFE2\uDFE3\uDFF0\uDFF1]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883\uD885-\uD887][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF39\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A\uDF50-\uDFFF]|\uD888[\uDC00-\uDFAF]/g, 5 | Latin: 6 | /[A-Za-z\u00AA\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB64\uAB66-\uAB69\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A]|\uD801[\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD837[\uDF00-\uDF1E\uDF25-\uDF2A]/g, 7 | Cyrillic: 8 | /[\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]|\uD838[\uDC30-\uDC6D\uDC8F]/g, 9 | Arabic: 10 | /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u088E\u0890\u0891\u0898-\u08E1\u08E3-\u08FF\uFB50-\uFBC2\uFBD3-\uFD3D\uFD40-\uFD8F\uFD92-\uFDC7\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC]|\uD803[\uDE60-\uDE7E\uDEFD-\uDEFF]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB\uDEF0\uDEF1]/g, 11 | ben: /[\u0980-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09FE]/g, 12 | Devanagari: 13 | /[\u0900-\u0950\u0955-\u0963\u0966-\u097F\uA8E0-\uA8FF]|\uD806[\uDF00-\uDF09]/g, 14 | jpn: /[\u3041-\u3096\u309D-\u309F]|\uD82C[\uDC01-\uDD1F\uDD32\uDD50-\uDD52]|\uD83C\uDE00|[\u30A1-\u30FA\u30FD-\u30FF\u31F0-\u31FF\u32D0-\u32FE\u3300-\u3357\uFF66-\uFF6F\uFF71-\uFF9D]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00\uDD20-\uDD22\uDD55\uDD64-\uDD67]|[\u3400-\u4DB5\u4E00-\u9FAF]/g, 15 | jav: /[\uA980-\uA9CD\uA9D0-\uA9D9\uA9DE\uA9DF]/g, 16 | kor: /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/g, 17 | tel: /[\u0C00-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3C-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58-\u0C5A\u0C5D\u0C60-\u0C63\u0C66-\u0C6F\u0C77-\u0C7F]/g, 18 | tam: /[\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BFA]|\uD807[\uDFC0-\uDFF1\uDFFF]/g, 19 | guj: /[\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AF1\u0AF9-\u0AFF]/g, 20 | kan: /[\u0C80-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDD\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1-\u0CF3]/g, 21 | mal: /[\u0D00-\u0D0C\u0D0E-\u0D10\u0D12-\u0D44\u0D46-\u0D48\u0D4A-\u0D4F\u0D54-\u0D63\u0D66-\u0D7F]/g, 22 | Myanmar: /[\u1000-\u109F\uA9E0-\uA9FE\uAA60-\uAA7F]/g, 23 | pan: /[\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A76]/g, 24 | Ethiopic: 25 | /[\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u137C\u1380-\u1399\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]/g, 26 | tha: /[\u0E01-\u0E3A\u0E40-\u0E5B]/g, 27 | sin: /[\u0D81-\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2-\u0DF4]|\uD804[\uDDE1-\uDDF4]/g, 28 | ell: /[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|\uD800[\uDD40-\uDD8E\uDDA0]|\uD834[\uDE00-\uDE45]/g, 29 | khm: /[\u1780-\u17DD\u17E0-\u17E9\u17F0-\u17F9\u19E0-\u19FF]/g, 30 | hye: /[\u0531-\u0556\u0559-\u058A\u058D-\u058F\uFB13-\uFB17]/g, 31 | sat: /[\u1C50-\u1C7F]/g, 32 | bod: /[\u0F00-\u0F47\u0F49-\u0F6C\u0F71-\u0F97\u0F99-\u0FBC\u0FBE-\u0FCC\u0FCE-\u0FD4\u0FD9\u0FDA]/g, 33 | Hebrew: 34 | /[\u0591-\u05C7\u05D0-\u05EA\u05EF-\u05F4\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFB4F]/g, 35 | kat: /[\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u10FF\u1C90-\u1CBA\u1CBD-\u1CBF\u2D00-\u2D25\u2D27\u2D2D]/g, 36 | lao: /[\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECE\u0ED0-\u0ED9\u0EDC-\u0EDF]/g, 37 | zgh: /[\u2D30-\u2D67\u2D6F\u2D70\u2D7F]/g, 38 | iii: /[\uA000-\uA48C\uA490-\uA4C6]/g, 39 | aii: /[\u0700-\u070D\u070F-\u074A\u074D-\u074F\u0860-\u086A]/g 40 | } 41 | -------------------------------------------------------------------------------- /packages/franc-all/expressions.js: -------------------------------------------------------------------------------- 1 | // This file is generated by `build.js`. 2 | /** @type {Record} */ 3 | export const expressions = { 4 | cmn: /[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFA6D\uFA70-\uFAD9]|\uD81B[\uDFE2\uDFE3\uDFF0\uDFF1]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883\uD885-\uD887][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF39\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A\uDF50-\uDFFF]|\uD888[\uDC00-\uDFAF]/g, 5 | Latin: 6 | /[A-Za-z\u00AA\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB64\uAB66-\uAB69\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A]|\uD801[\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD837[\uDF00-\uDF1E\uDF25-\uDF2A]/g, 7 | Cyrillic: 8 | /[\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]|\uD838[\uDC30-\uDC6D\uDC8F]/g, 9 | Arabic: 10 | /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u088E\u0890\u0891\u0898-\u08E1\u08E3-\u08FF\uFB50-\uFBC2\uFBD3-\uFD3D\uFD40-\uFD8F\uFD92-\uFDC7\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC]|\uD803[\uDE60-\uDE7E\uDEFD-\uDEFF]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB\uDEF0\uDEF1]/g, 11 | ben: /[\u0980-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09FE]/g, 12 | Devanagari: 13 | /[\u0900-\u0950\u0955-\u0963\u0966-\u097F\uA8E0-\uA8FF]|\uD806[\uDF00-\uDF09]/g, 14 | jpn: /[\u3041-\u3096\u309D-\u309F]|\uD82C[\uDC01-\uDD1F\uDD32\uDD50-\uDD52]|\uD83C\uDE00|[\u30A1-\u30FA\u30FD-\u30FF\u31F0-\u31FF\u32D0-\u32FE\u3300-\u3357\uFF66-\uFF6F\uFF71-\uFF9D]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00\uDD20-\uDD22\uDD55\uDD64-\uDD67]|[\u3400-\u4DB5\u4E00-\u9FAF]/g, 15 | jav: /[\uA980-\uA9CD\uA9D0-\uA9D9\uA9DE\uA9DF]/g, 16 | kor: /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/g, 17 | tel: /[\u0C00-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3C-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58-\u0C5A\u0C5D\u0C60-\u0C63\u0C66-\u0C6F\u0C77-\u0C7F]/g, 18 | tam: /[\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BFA]|\uD807[\uDFC0-\uDFF1\uDFFF]/g, 19 | guj: /[\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AF1\u0AF9-\u0AFF]/g, 20 | kan: /[\u0C80-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDD\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1-\u0CF3]/g, 21 | mal: /[\u0D00-\u0D0C\u0D0E-\u0D10\u0D12-\u0D44\u0D46-\u0D48\u0D4A-\u0D4F\u0D54-\u0D63\u0D66-\u0D7F]/g, 22 | Myanmar: /[\u1000-\u109F\uA9E0-\uA9FE\uAA60-\uAA7F]/g, 23 | pan: /[\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A76]/g, 24 | Ethiopic: 25 | /[\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u137C\u1380-\u1399\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]/g, 26 | tha: /[\u0E01-\u0E3A\u0E40-\u0E5B]/g, 27 | sin: /[\u0D81-\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2-\u0DF4]|\uD804[\uDDE1-\uDDF4]/g, 28 | ell: /[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|\uD800[\uDD40-\uDD8E\uDDA0]|\uD834[\uDE00-\uDE45]/g, 29 | khm: /[\u1780-\u17DD\u17E0-\u17E9\u17F0-\u17F9\u19E0-\u19FF]/g, 30 | hye: /[\u0531-\u0556\u0559-\u058A\u058D-\u058F\uFB13-\uFB17]/g, 31 | sat: /[\u1C50-\u1C7F]/g, 32 | Tibetan: 33 | /[\u0F00-\u0F47\u0F49-\u0F6C\u0F71-\u0F97\u0F99-\u0FBC\u0FBE-\u0FCC\u0FCE-\u0FD4\u0FD9\u0FDA]/g, 34 | Hebrew: 35 | /[\u0591-\u05C7\u05D0-\u05EA\u05EF-\u05F4\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFB4F]/g, 36 | kat: /[\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u10FF\u1C90-\u1CBA\u1CBD-\u1CBF\u2D00-\u2D25\u2D27\u2D2D]/g, 37 | lao: /[\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECE\u0ED0-\u0ED9\u0EDC-\u0EDF]/g, 38 | zgh: /[\u2D30-\u2D67\u2D6F\u2D70\u2D7F]/g, 39 | iii: /[\uA000-\uA48C\uA490-\uA4C6]/g, 40 | aii: /[\u0700-\u070D\u070F-\u074A\u074D-\u074F\u0860-\u086A]/g, 41 | div: /[\u0780-\u07B1]/g, 42 | vai: /[\uA500-\uA62B]/g, 43 | Canadian_Aboriginal: /[\u1400-\u167F\u18B0-\u18F5]|\uD806[\uDEB0-\uDEBF]/g, 44 | chr: /[\u13A0-\u13F5\u13F8-\u13FD\uAB70-\uABBF]/g, 45 | kkh: /[\u1A20-\u1A5E\u1A60-\u1A7C\u1A7F-\u1A89\u1A90-\u1A99\u1AA0-\u1AAD]/g, 46 | blt: /[\uAA80-\uAAC2\uAADB-\uAADF]/g 47 | } 48 | -------------------------------------------------------------------------------- /test/api.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict' 2 | import test from 'node:test' 3 | import {franc, francAll} from '../packages/franc/index.js' 4 | import {fixtures} from './fixtures.js' 5 | 6 | const languageA = 'pol' 7 | const languageB = 'eng' 8 | const fixtureB = fixtures[languageB].fixture 9 | const hebrew = 'הפיתוח הראשוני בשנות ה־80 התמקד בגנו ובמערכת הגרפית' 10 | 11 | if (languageA === franc(fixtureB)) { 12 | throw new Error('a and b should not be equal...') 13 | } 14 | 15 | test('franc()', () => { 16 | assert.equal(typeof franc, 'function', 'should be of type `function`') 17 | assert.equal(typeof franc('XYZ'), 'string', 'should return a string') 18 | assert.equal( 19 | franc('XYZ'), 20 | 'und', 21 | 'should return "und" on an undetermined value' 22 | ) 23 | assert.equal(franc(), 'und', 'should return "und" on a missing value') 24 | assert.equal( 25 | franc('the the the the the '), 26 | 'sco', 27 | 'should work on weird values' 28 | ) 29 | 30 | /* Inspired by lifthrasiir on hackernews: 31 | * https://news.ycombinator.com/item?id=8405672 */ 32 | assert.equal( 33 | franc( 34 | [ 35 | '한국어 문서가 전 세계 웹에서 차지하는 비중은 2004년에 4.1%로, 이는 영어(35.8%), ', 36 | '중국어(14.1%), 일본어(9.6%), 스페인어(9%), 독일어(7%)에 이어 전 세계 6위이다. ', 37 | '한글 문서와 한국어 문서를 같은것으로 볼 때, 웹상에서의 한국어 사용 인구는 전 세계 ', 38 | '69억여 명의 인구 중 약 1%에 해당한다.' 39 | ].join('') 40 | ), 41 | 'kor', 42 | 'should work on unique-scripts with many latin characters (1)' 43 | ) 44 | 45 | assert.equal( 46 | franc( 47 | [ 48 | '現行の学校文法では、英語にあるような「目的語」「補語」などの成分はないとする。', 49 | '英語文法では "I read a book." の "a book" はSVO文型の一部をなす目的語であり、', 50 | 'また、"I go to the library." の "the library" ', 51 | 'は前置詞とともに付け加えられた修飾語と考えられる。' 52 | ].join('\n') 53 | ), 54 | 'jpn', 55 | 'should work on unique-scripts with many latin characters (2)' 56 | ) 57 | 58 | assert.equal( 59 | franc('すべての人は、生命、自由及び身体の安全に対する権利を有する。'), 60 | 'jpn', 61 | 'should detect Japanese even when Han ratio > 0.5 (udhr_jpn art 3) (1)' 62 | ) 63 | 64 | assert.equal( 65 | franc( 66 | [ 67 | 'すべての人は、憲法又は法律によって与えられた基本的権利を侵害する行為に対し、', 68 | '権限を有する国内裁判所による効果的な救済を受ける権利を有する。' 69 | ].join('') 70 | ), 71 | 'jpn', 72 | 'should detect Japanese even when Han ratio > 0.5 (udhr_jpn art 8) (2)' 73 | ) 74 | 75 | assert.equal( 76 | franc( 77 | [ 78 | '成年の男女は、人種、国籍又は宗教によるいかなる制限をも受けることなく、婚姻し、', 79 | 'かつ家庭をつくる権利を有する。成年の男女は、婚姻中及びその解消に際し、', 80 | '婚姻に関し平等の権利を有する。婚姻は、婚姻の意思を有する両当事者の自由かつ完全な合意によってのみ成立する。', 81 | '家庭は、社会の自然かつ基礎的な集団単位であって、社会及び国の保護を受ける権利を有する。' 82 | ].join('') 83 | ), 84 | 'jpn', 85 | 'should detect Japanese even when Han ratio > 0.5 (udhr_jpn art 16) (3)' 86 | ) 87 | 88 | assert.notEqual( 89 | franc(fixtureB, {ignore: [franc(fixtureB)]}), 90 | franc(fixtureB), 91 | 'should accept `ignore`' 92 | ) 93 | 94 | assert.deepEqual( 95 | franc(fixtures.aii.fixture, {ignore: ['aii']}), 96 | 'und', 97 | 'should support `ignore` if the script can only be in that language' 98 | ) 99 | 100 | assert.equal( 101 | franc(fixtureB, {only: [languageA]}), 102 | languageA, 103 | 'should accept `only`' 104 | ) 105 | 106 | assert.equal( 107 | franc(hebrew, {only: ['eng']}), 108 | 'und', 109 | 'should accept `only` for different scripts' 110 | ) 111 | 112 | assert.equal( 113 | franc('the', {minLength: 3}), 114 | 'sco', 115 | 'should accept `minLength` (1)' 116 | ) 117 | assert.equal( 118 | franc('the', {minLength: 4}), 119 | 'und', 120 | 'should accept `minLength` (2)' 121 | ) 122 | 123 | assert.equal( 124 | franc('987 654 321'), 125 | 'und', 126 | 'should return `und` for generic characters' 127 | ) 128 | }) 129 | 130 | test('francAll()', () => { 131 | assert.equal(typeof francAll, 'function', 'should be of type `function`') 132 | 133 | assert.deepEqual( 134 | francAll('XYZ'), 135 | [['und', 1]], 136 | 'should return an array containing language--probability tuples' 137 | ) 138 | 139 | assert.deepEqual( 140 | francAll('פאר טסי'), 141 | [['und', 1]], 142 | 'should return `[["und", 1]]` without matches (1)' 143 | ) 144 | 145 | assert.deepEqual( 146 | francAll('פאר טסי', {minLength: 3}), 147 | [ 148 | ['heb', 0], 149 | ['ydd', 0] 150 | ], 151 | 'should return `[["und", 1]]` without matches (2)' 152 | ) 153 | 154 | assert.deepEqual( 155 | francAll('xyz'), 156 | [['und', 1]], 157 | 'should return `[["und", 1]]` without matches (3)' 158 | ) 159 | 160 | assert.deepEqual( 161 | francAll(), 162 | [['und', 1]], 163 | 'should return `[["und", 1]]` for a missing value' 164 | ) 165 | 166 | assert.deepEqual( 167 | francAll('987 654 321'), 168 | [['und', 1]], 169 | 'should return `[["und", 1]]` for generic characters' 170 | ) 171 | 172 | assert.deepEqual( 173 | francAll('the the the the the ').slice(0, 2), 174 | [ 175 | ['sco', 1], 176 | ['eng', 0.988_900_100_908_173_6] 177 | ], 178 | 'should work on weird values' 179 | ) 180 | 181 | assert.deepEqual( 182 | francAll(fixtureB, {ignore: [franc(fixtureB)]}) 183 | .map((tuple) => { 184 | return tuple[0] 185 | }) 186 | .indexOf(franc(fixtureB)), 187 | -1, 188 | 'should accept `ignore`' 189 | ) 190 | 191 | assert.deepEqual( 192 | francAll(fixtureB, {only: [languageA]}), 193 | [[languageA, 1]], 194 | 'should accept `only`' 195 | ) 196 | 197 | assert.deepEqual( 198 | francAll(hebrew, {only: ['eng']}), 199 | [['und', 1]], 200 | 'should accept `only` for different scripts' 201 | ) 202 | 203 | assert.deepEqual( 204 | francAll('the', {minLength: 3}).slice(0, 2), 205 | [ 206 | ['sco', 1], 207 | ['eng', 0.998_885_172_798_216_3] 208 | ], 209 | 'should accept `minLength` (1)' 210 | ) 211 | 212 | assert.deepEqual( 213 | francAll('the', {minLength: 4}), 214 | [['und', 1]], 215 | 'should accept `minLength` (2)' 216 | ) 217 | }) 218 | 219 | test('algorithm', () => { 220 | const keys = Object.keys(fixtures) 221 | 222 | // Failing for some reason. 223 | // Trigrams generated incorrectly? 224 | const ignore = new Set(['bos', 'prs']) 225 | 226 | for (const code of keys) { 227 | const info = fixtures[code] 228 | 229 | if (ignore.has(info.iso6393)) continue 230 | 231 | assert.equal( 232 | francAll(info.fixture)[0][0], 233 | info.iso6393, 234 | info.fixture.replace(/\n/g, '\\n').slice(0, 20) + 235 | '... (' + 236 | info.iso6393 + 237 | ')' 238 | ) 239 | } 240 | }) 241 | -------------------------------------------------------------------------------- /packages/franc-min/readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # franc-min 4 | 5 | Detect the language of text. 6 | 7 | Built with support for 82 languages (8M or more speakers). 8 | 9 | View the [monorepo](https://github.com/wooorm/franc) for more packages and 10 | info on using them. 11 | 12 | ## Install 13 | 14 | This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). 15 | In Node.js (version 14.14+, 16.0+), install with 16 | [npm](https://docs.npmjs.com/cli/install): 17 | 18 | npm: 19 | 20 | ```sh 21 | npm install franc-min 22 | ``` 23 | 24 | ## Data 25 | 26 | This build supports the following languages: 27 | 28 | | Code | Name | Speakers | 29 | | - | - | - | 30 | | [`cmn`](http://www-01.sil.org/iso639-3/documentation.asp?id=cmn) | Mandarin Chinese | 885M | 31 | | [`spa`](http://www-01.sil.org/iso639-3/documentation.asp?id=spa) | Spanish | 332M | 32 | | [`eng`](http://www-01.sil.org/iso639-3/documentation.asp?id=eng) | English | 322M | 33 | | [`rus`](http://www-01.sil.org/iso639-3/documentation.asp?id=rus) | Russian | 288M | 34 | | [`arb`](http://www-01.sil.org/iso639-3/documentation.asp?id=arb) | Standard Arabic | 280M | 35 | | [`ben`](http://www-01.sil.org/iso639-3/documentation.asp?id=ben) | Bengali | 196M | 36 | | [`hin`](http://www-01.sil.org/iso639-3/documentation.asp?id=hin) | Hindi | 182M | 37 | | [`por`](http://www-01.sil.org/iso639-3/documentation.asp?id=por) | Portuguese | 182M | 38 | | [`ind`](http://www-01.sil.org/iso639-3/documentation.asp?id=ind) | Indonesian | 140M | 39 | | [`jpn`](http://www-01.sil.org/iso639-3/documentation.asp?id=jpn) | Japanese | 125M | 40 | | [`fra`](http://www-01.sil.org/iso639-3/documentation.asp?id=fra) | French | 124M | 41 | | [`deu`](http://www-01.sil.org/iso639-3/documentation.asp?id=deu) | German | 121M | 42 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Javanese) | 76M | 43 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Latin) | 76M | 44 | | [`kor`](http://www-01.sil.org/iso639-3/documentation.asp?id=kor) | Korean | 75M | 45 | | [`tel`](http://www-01.sil.org/iso639-3/documentation.asp?id=tel) | Telugu | 73M | 46 | | [`vie`](http://www-01.sil.org/iso639-3/documentation.asp?id=vie) | Vietnamese | 67M | 47 | | [`mar`](http://www-01.sil.org/iso639-3/documentation.asp?id=mar) | Marathi | 65M | 48 | | [`ita`](http://www-01.sil.org/iso639-3/documentation.asp?id=ita) | Italian | 63M | 49 | | [`tam`](http://www-01.sil.org/iso639-3/documentation.asp?id=tam) | Tamil | 62M | 50 | | [`tur`](http://www-01.sil.org/iso639-3/documentation.asp?id=tur) | Turkish | 59M | 51 | | [`urd`](http://www-01.sil.org/iso639-3/documentation.asp?id=urd) | Urdu | 54M | 52 | | [`guj`](http://www-01.sil.org/iso639-3/documentation.asp?id=guj) | Gujarati | 44M | 53 | | [`pol`](http://www-01.sil.org/iso639-3/documentation.asp?id=pol) | Polish | 44M | 54 | | [`ukr`](http://www-01.sil.org/iso639-3/documentation.asp?id=ukr) | Ukrainian | 41M | 55 | | [`kan`](http://www-01.sil.org/iso639-3/documentation.asp?id=kan) | Kannada | 38M | 56 | | [`mai`](http://www-01.sil.org/iso639-3/documentation.asp?id=mai) | Maithili | 35M | 57 | | [`mal`](http://www-01.sil.org/iso639-3/documentation.asp?id=mal) | Malayalam | 34M | 58 | | [`pes`](http://www-01.sil.org/iso639-3/documentation.asp?id=pes) | Iranian Persian | 33M | 59 | | [`mya`](http://www-01.sil.org/iso639-3/documentation.asp?id=mya) | Burmese | 31M | 60 | | [`swh`](http://www-01.sil.org/iso639-3/documentation.asp?id=swh) | Swahili (individual language) | 30M | 61 | | [`sun`](http://www-01.sil.org/iso639-3/documentation.asp?id=sun) | Sundanese | 27M | 62 | | [`ron`](http://www-01.sil.org/iso639-3/documentation.asp?id=ron) | Romanian | 26M | 63 | | [`pan`](http://www-01.sil.org/iso639-3/documentation.asp?id=pan) | Panjabi | 26M | 64 | | [`bho`](http://www-01.sil.org/iso639-3/documentation.asp?id=bho) | Bhojpuri | 25M | 65 | | [`amh`](http://www-01.sil.org/iso639-3/documentation.asp?id=amh) | Amharic | 23M | 66 | | [`hau`](http://www-01.sil.org/iso639-3/documentation.asp?id=hau) | Hausa | 22M | 67 | | [`fuv`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuv) | Nigerian Fulfulde | 22M | 68 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Cyrillic) | 21M | 69 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Latin) | 21M | 70 | | [`hrv`](http://www-01.sil.org/iso639-3/documentation.asp?id=hrv) | Croatian | 21M | 71 | | [`nld`](http://www-01.sil.org/iso639-3/documentation.asp?id=nld) | Dutch | 21M | 72 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Cyrillic) | 21M | 73 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Latin) | 21M | 74 | | [`tha`](http://www-01.sil.org/iso639-3/documentation.asp?id=tha) | Thai | 21M | 75 | | [`ckb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ckb) | Central Kurdish | 20M | 76 | | [`yor`](http://www-01.sil.org/iso639-3/documentation.asp?id=yor) | Yoruba | 20M | 77 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Cyrillic) | 18M | 78 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Latin) | 18M | 79 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Arabic) | 18M | 80 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Latin) | 18M | 81 | | [`ibo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibo) | Igbo | 17M | 82 | | [`npi`](http://www-01.sil.org/iso639-3/documentation.asp?id=npi) | Nepali (individual language) | 16M | 83 | | [`ceb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ceb) | Cebuano | 15M | 84 | | [`skr`](http://www-01.sil.org/iso639-3/documentation.asp?id=skr) | Saraiki | 15M | 85 | | [`tgl`](http://www-01.sil.org/iso639-3/documentation.asp?id=tgl) | Tagalog | 15M | 86 | | [`hun`](http://www-01.sil.org/iso639-3/documentation.asp?id=hun) | Hungarian | 15M | 87 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Cyrillic) | 14M | 88 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Latin) | 14M | 89 | | [`sin`](http://www-01.sil.org/iso639-3/documentation.asp?id=sin) | Sinhala | 13M | 90 | | [`koi`](http://www-01.sil.org/iso639-3/documentation.asp?id=koi) | Komi-Permyak | 13M | 91 | | [`ell`](http://www-01.sil.org/iso639-3/documentation.asp?id=ell) | Modern Greek (1453-) | 12M | 92 | | [`ces`](http://www-01.sil.org/iso639-3/documentation.asp?id=ces) | Czech | 12M | 93 | | [`mag`](http://www-01.sil.org/iso639-3/documentation.asp?id=mag) | Magahi | 11M | 94 | | [`run`](http://www-01.sil.org/iso639-3/documentation.asp?id=run) | Rundi | 11M | 95 | | [`bel`](http://www-01.sil.org/iso639-3/documentation.asp?id=bel) | Belarusian | 10M | 96 | | [`plt`](http://www-01.sil.org/iso639-3/documentation.asp?id=plt) | Plateau Malagasy | 10M | 97 | | [`qug`](http://www-01.sil.org/iso639-3/documentation.asp?id=qug) | Chimborazo Highland Quichua | 10M | 98 | | [`mad`](http://www-01.sil.org/iso639-3/documentation.asp?id=mad) | Madurese | 10M | 99 | | [`nya`](http://www-01.sil.org/iso639-3/documentation.asp?id=nya) | Nyanja | 10M | 100 | | [`zyb`](http://www-01.sil.org/iso639-3/documentation.asp?id=zyb) | Yongbei Zhuang | 10M | 101 | | [`pbu`](http://www-01.sil.org/iso639-3/documentation.asp?id=pbu) | Northern Pashto | 10M | 102 | | [`kin`](http://www-01.sil.org/iso639-3/documentation.asp?id=kin) | Kinyarwanda | 9M | 103 | | [`zul`](http://www-01.sil.org/iso639-3/documentation.asp?id=zul) | Zulu | 9M | 104 | | [`bul`](http://www-01.sil.org/iso639-3/documentation.asp?id=bul) | Bulgarian | 9M | 105 | | [`swe`](http://www-01.sil.org/iso639-3/documentation.asp?id=swe) | Swedish | 9M | 106 | | [`lin`](http://www-01.sil.org/iso639-3/documentation.asp?id=lin) | Lingala | 8M | 107 | | [`som`](http://www-01.sil.org/iso639-3/documentation.asp?id=som) | Somali | 8M | 108 | | [`hms`](http://www-01.sil.org/iso639-3/documentation.asp?id=hms) | Southern Qiandong Miao | 8M | 109 | | [`hnj`](http://www-01.sil.org/iso639-3/documentation.asp?id=hnj) | Hmong Njua | 8M | 110 | | [`ilo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ilo) | Iloko | 8M | 111 | | [`kaz`](http://www-01.sil.org/iso639-3/documentation.asp?id=kaz) | Kazakh | 8M | 112 | 113 | ## License 114 | 115 | [MIT](https://github.com/wooorm/franc/blob/main/license) © [Titus Wormer](http://wooorm.com) 116 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # ![franc][logo] 2 | 3 | [![Build Status][build-badge]][build] 4 | [![Coverage Status][coverage-badge]][coverage] 5 | 6 | Detect the language of text. 7 | 8 | ## What’s so cool about franc? 9 | 10 | 1. **franc** can support more languages(†) than any other 11 | library 12 | 2. **franc** is packaged with support for [82][s], [186][m], or [419][l] 13 | languages 14 | 3. **franc** has a CLI 15 | 16 | † - Based on the [UDHR][], the most translated copyright-free document in the 17 | world. 18 | 19 | ## What’s not so cool about franc? 20 | 21 | **franc** supports many languages, which means it’s easily confused on small 22 | samples. 23 | Make sure to pass it big documents to get reliable results. 24 | 25 | ## Install 26 | 27 | > 👉 **Note**: this installs the [`franc`][m] package, with support for 187 28 | > languages (languages which have 1 million or more speakers). 29 | > [`franc-min`][s] (82 languages, 8m or more speakers) and [`franc-all`][l] 30 | > (all 414 possible languages) are also available. 31 | > Finally, use `franc-cli` to install the [CLI][]. 32 | 33 | This package is [ESM only][esm]. 34 | In Node.js (version 14.14+, 16.0+), install with [npm][]: 35 | 36 | ```sh 37 | npm install franc 38 | ``` 39 | 40 | In Deno with [`esm.sh`][esmsh]: 41 | 42 | ```js 43 | import {franc, francAll} from 'https://esm.sh/franc@6' 44 | ``` 45 | 46 | In browsers with [`esm.sh`][esmsh]: 47 | 48 | ```html 49 | 52 | ``` 53 | 54 | ## Use 55 | 56 | ```js 57 | import {franc, francAll} from 'franc' 58 | 59 | franc('Alle menslike wesens word vry') //=> 'afr' 60 | franc('এটি একটি ভাষা একক IBM স্ক্রিপ্ট') //=> 'ben' 61 | franc('Alle menneske er fødde til fridom') //=> 'nno' 62 | 63 | franc('') //=> 'und' (language code that stands for undetermined) 64 | 65 | // You can change what’s too short (default: 10): 66 | franc('the') //=> 'und' 67 | franc('the', {minLength: 3}) //=> 'sco' 68 | 69 | console.log(francAll('Considerando ser essencial que os direitos humanos')) 70 | //=> [['por', 1], ['glg', 0.771284519307895], ['spa', 0.6034146900423971], …123 more items] 71 | 72 | console.log(francAll('Considerando ser essencial que os direitos humanos', {only: ['por', 'spa']})) 73 | //=> [['por', 1 ], ['spa', 0.6034146900423971]] 74 | 75 | console.log(francAll('Considerando ser essencial que os direitos humanos', {ignore: ['spa', 'glg']})) 76 | //=> [['por', 1], ['cat', 0.5367251059928957], ['src', 0.47461899851037015], …121 more items] 77 | ``` 78 | 79 | ## API 80 | 81 | This package exports the identifiers `franc`, `francAll`. 82 | There is no default export. 83 | 84 | ### `franc(value[, options])` 85 | 86 | Get the most probable language for the given value. 87 | 88 | ###### Parameters 89 | 90 | * `value` (`string`) — value to test 91 | * `options` (`Options`, optional) — configuration 92 | 93 | ###### Returns 94 | 95 | The most probable language (`string`). 96 | 97 | ### `francAll(value[, options])` 98 | 99 | Get the most probable language for the given value. 100 | 101 | ###### Parameters 102 | 103 | * `value` (`string`) — value to test 104 | * `options` (`Options`, optional) — configuration 105 | 106 | ###### Returns 107 | 108 | Array containing language—distance tuples (`Array<[string, number]>`). 109 | 110 | ### `Options` 111 | 112 | Configuration (`Object`, optional) with the following fields: 113 | 114 | ###### `options.only` 115 | 116 | Languages to allow (`Array`, optional). 117 | 118 | ###### `options.ignore` 119 | 120 | Languages to ignore (`Array`, optional). 121 | 122 | ###### `options.minLength` 123 | 124 | Minimum length to accept (`number`, default: `10`). 125 | 126 | ## CLI 127 | 128 | Install: 129 | 130 | ```sh 131 | npm install franc-cli --global 132 | ``` 133 | 134 | Use: 135 | 136 | ```text 137 | CLI to detect the language of text 138 | 139 | Usage: franc [options] 140 | 141 | Options: 142 | 143 | -h, --help output usage information 144 | -v, --version output version number 145 | -m, --min-length minimum length to accept 146 | -o, --only allow languages 147 | -i, --ignore disallow languages 148 | -a, --all display all guesses 149 | 150 | Usage: 151 | 152 | # output language 153 | $ franc "Alle menslike wesens word vry" 154 | # afr 155 | 156 | # output language from stdin (expects utf8) 157 | $ echo "এটি একটি ভাষা একক IBM স্ক্রিপ্ট" | franc 158 | # ben 159 | 160 | # ignore certain languages 161 | $ franc --ignore por,glg "O Brasil caiu 26 posições" 162 | # src 163 | 164 | # output language from stdin with only 165 | $ echo "Alle mennesker er født frie og" | franc --only nob,dan 166 | # nob 167 | ``` 168 | 169 | ## Data 170 | 171 | ###### Supported languages 172 | 173 | | Package | Languages | Speakers | 174 | | - | - | - | 175 | | [`franc-min`][s] | 82 | 8M or more | 176 | | [`franc`][m] | 187 | 1M or more | 177 | | [`franc-all`][l] | 414 | - | 178 | 179 | ###### Language code 180 | 181 | > 👉 **Note**: franc returns [ISO 639-3][iso6393] codes (three letter codes). 182 | > **Not** ISO 639-1 or ISO 639-2. 183 | > See also [GH-10][] and [GH-30][]. 184 | 185 | To get more info about the languages represented by ISO 639-3, use 186 | [`iso-639-3`][iso-639-3]. 187 | There is also an index available to map ISO 639-3 to ISO 639-1 codes, 188 | [`iso-639-3/to-1.json`][iso-639-3-to-1], but note that not all 639-3 codes can 189 | be represented in 639-1. 190 | 191 | ## Types 192 | 193 | These packages are fully typed with [TypeScript][]. 194 | They export the additional types `TrigramTuple` and `Options`. 195 | 196 | ## Compatibility 197 | 198 | These package are at least compatible with all maintained versions of Node.js. 199 | As of now, that is Node.js 14.14+ and 16.0+. 200 | They also works in Deno and modern browsers. 201 | 202 | ## Ports 203 | 204 | Franc has been ported to several other programming languages. 205 | 206 | * Elixir — [`paasaa`](https://github.com/minibikini/paasaa) 207 | * Erlang — [`efranc`](https://github.com/G-Corp/efranc) 208 | * Go — [`franco`](https://github.com/kapsteur/franco), 209 | [`whatlanggo`](https://github.com/abadojack/whatlanggo) 210 | * R — [`franc`](https://github.com/MangoTheCat/franc) 211 | * Rust — [`whatlang-rs`](https://github.com/greyblake/whatlang-rs) 212 | * Dart — [`francd`](https://github.com/svonidze/francd) 213 | * Python — [`pyfranc`](https://github.com/cyb3rk0tik/pyfranc) 214 | 215 | The works franc is derived from have themselves also been ported to other 216 | languages. 217 | 218 | ## Derivation 219 | 220 | Franc is a derivative work from [guess-language][] (Python, LGPL), 221 | [guesslanguage][] (C++, LGPL), and [Language::Guess][language-guess] 222 | (Perl, GPL). 223 | Their creators granted me the rights to distribute franc under the MIT license: 224 | respectively, [Kent S. Johnson][grant-3], [Jacob R. Rideout][grant-2], and 225 | [Maciej Ceglowski][grant-1]. 226 | 227 | ## Contribute 228 | 229 | Yes please! 230 | See [How to Contribute to Open Source][contribute]. 231 | 232 | ## Security 233 | 234 | This package is safe. 235 | 236 | ## License 237 | 238 | [MIT][] © [Titus Wormer][home] 239 | 240 | 241 | 242 | [logo]: https://raw.githubusercontent.com/wooorm/franc/a162cc0/logo.svg?sanitize=true 243 | 244 | [build-badge]: https://github.com/wooorm/franc/workflows/main/badge.svg 245 | 246 | [build]: https://github.com/wooorm/franc/actions 247 | 248 | [coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/franc.svg 249 | 250 | [coverage]: https://codecov.io/github/wooorm/franc 251 | 252 | [npm]: https://docs.npmjs.com/cli/install 253 | 254 | [guess-language]: https://github.com/kent37/guess-language 255 | 256 | [guesslanguage]: http://websvn.kde.org/branches/work/sonnet-refactoring/common/nlp/guesslanguage.cpp?view=markup 257 | 258 | [language-guess]: http://web.archive.org/web/20090228163219/http://languid.cantbedone.org/ 259 | 260 | [grant-1]: https://github.com/wooorm/franc/issues/6#issuecomment-59669191 261 | 262 | [grant-2]: https://github.com/wooorm/franc/issues/6#issuecomment-60196819 263 | 264 | [grant-3]: https://github.com/wooorm/franc/issues/6#issuecomment-59936827 265 | 266 | [esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c 267 | 268 | [esmsh]: https://esm.sh 269 | 270 | [typescript]: https://www.typescriptlang.org 271 | 272 | [contribute]: https://opensource.guide/how-to-contribute/ 273 | 274 | [mit]: license 275 | 276 | [home]: http://wooorm.com 277 | 278 | [cli]: #cli 279 | 280 | [udhr]: http://unicode.org/udhr/ 281 | 282 | [s]: https://github.com/wooorm/franc/tree/main/packages/franc-min 283 | 284 | [m]: https://github.com/wooorm/franc/tree/main/packages/franc 285 | 286 | [l]: https://github.com/wooorm/franc/tree/main/packages/franc-all 287 | 288 | [iso6393]: https://iso639-3.sil.org/code_tables/639/data 289 | 290 | [gh-10]: https://github.com/wooorm/franc/issues/10 291 | 292 | [gh-30]: https://github.com/wooorm/franc/issues/30 293 | 294 | [iso-639-3]: https://github.com/wooorm/iso-639-3 295 | 296 | [iso-639-3-to-1]: https://github.com/wooorm/iso-639-3/blob/main/iso6393-to-1.js 297 | -------------------------------------------------------------------------------- /packages/franc/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('trigram-utils').TrigramTuple} TrigramTuple 3 | * 4 | * @typedef Options 5 | * @property {Array} [only] 6 | * Languages to allow. 7 | * @property {Array} [ignore] 8 | * Languages to ignore. 9 | * @property {number} [minLength=10] 10 | * Minimum length to accept. 11 | */ 12 | 13 | /* Load `trigram-utils`. */ 14 | import {asTuples} from 'trigram-utils' 15 | /* Load `expressions` (regular expressions matching 16 | * scripts). */ 17 | import {expressions} from './expressions.js' 18 | /* Load `data` (trigram information per language, 19 | * per script). */ 20 | import {data} from './data.js' 21 | 22 | /* Maximum sample length. */ 23 | const MAX_LENGTH = 2048 24 | 25 | /* Minimum sample length. */ 26 | const MIN_LENGTH = 10 27 | 28 | /* The maximum distance to add when a given trigram does 29 | * not exist in a trigram dictionary. */ 30 | const MAX_DIFFERENCE = 300 31 | 32 | const own = {}.hasOwnProperty 33 | 34 | /* Construct trigram dictionaries. */ 35 | 36 | /** @type {string} */ 37 | let script 38 | 39 | /** @type {Record>>} */ 40 | const numericData = {} 41 | 42 | for (script in data) { 43 | if (own.call(data, script)) { 44 | const languages = data[script] 45 | /** @type {string} */ 46 | let name 47 | 48 | numericData[script] = {} 49 | 50 | for (name in languages) { 51 | if (own.call(languages, name)) { 52 | const model = languages[name].split('|') 53 | /** @type {Record} */ 54 | const trigrams = {} 55 | let weight = model.length 56 | 57 | while (weight--) { 58 | trigrams[model[weight]] = weight 59 | } 60 | 61 | numericData[script][name] = trigrams 62 | } 63 | } 64 | } 65 | } 66 | 67 | /** 68 | * Get the most probable language for the given value. 69 | * 70 | * @param {string} [value] 71 | * The value to test. 72 | * @param {Options} [options] 73 | * Configuration. 74 | * @return {string} 75 | * The most probable language. 76 | */ 77 | export function franc(value, options) { 78 | return francAll(value, options)[0][0] 79 | } 80 | 81 | /** 82 | * Get a list of probable languages the given value is 83 | * written in. 84 | * 85 | * @param {string} [value] 86 | * The value to test. 87 | * @param {Options} [options] 88 | * Configuration. 89 | * @return {Array} 90 | * An array containing language—distance tuples. 91 | */ 92 | export function francAll(value, options = {}) { 93 | /** @type {Array} */ 94 | // @ts-expect-error: `whitelist` is from long ago. 95 | const only = [...(options.whitelist || []), ...(options.only || [])] 96 | /** @type {Array} */ 97 | // @ts-expect-error: `blacklist` is from long ago. 98 | const ignore = [...(options.blacklist || []), ...(options.ignore || [])] 99 | const minLength = 100 | options.minLength !== null && options.minLength !== undefined 101 | ? options.minLength 102 | : MIN_LENGTH 103 | 104 | if (!value || value.length < minLength) { 105 | return und() 106 | } 107 | 108 | value = value.slice(0, MAX_LENGTH) 109 | 110 | /* Get the script which characters occur the most 111 | * in `value`. */ 112 | const script = getTopScript(value, expressions) 113 | 114 | /* One languages exists for the most-used script. */ 115 | if (!script[0] || !(script[0] in numericData)) { 116 | /* If no matches occured, such as a digit only string, 117 | * or because the language is ignored, exit with `und`. */ 118 | if (!script[0] || script[1] === 0 || !allow(script[0], only, ignore)) { 119 | return und() 120 | } 121 | 122 | return singleLanguageTuples(script[0]) 123 | } 124 | 125 | /* Get all distances for a given script, and 126 | * normalize the distance values. */ 127 | return normalize( 128 | value, 129 | getDistances(asTuples(value), numericData[script[0]], only, ignore) 130 | ) 131 | } 132 | 133 | /** 134 | * Normalize the difference for each tuple in 135 | * `distances`. 136 | * 137 | * @param {string} value 138 | * Value to normalize. 139 | * @param {Array} distances 140 | * List of distances. 141 | * @return {Array} 142 | * Normalized distances. 143 | */ 144 | function normalize(value, distances) { 145 | const min = distances[0][1] 146 | const max = value.length * MAX_DIFFERENCE - min 147 | let index = -1 148 | 149 | while (++index < distances.length) { 150 | distances[index][1] = 1 - (distances[index][1] - min) / max || 0 151 | } 152 | 153 | return distances 154 | } 155 | 156 | /** 157 | * From `scripts`, get the most occurring expression for 158 | * `value`. 159 | * 160 | * @param {string} value 161 | * Value to check. 162 | * @param {Record} scripts 163 | * Top-Scripts. 164 | * @return {[string|undefined, number]} 165 | * Top script and its occurrence percentage. 166 | */ 167 | function getTopScript(value, scripts) { 168 | let topCount = -1 169 | /** @type {string|undefined} */ 170 | let topScript 171 | /** @type {string} */ 172 | let script 173 | 174 | for (script in scripts) { 175 | if (own.call(scripts, script)) { 176 | const count = getOccurrence(value, scripts[script]) 177 | 178 | if (count > topCount) { 179 | topCount = count 180 | topScript = script 181 | } 182 | } 183 | } 184 | 185 | return [topScript, topCount] 186 | } 187 | 188 | /** 189 | * Get the occurrence ratio of `expression` for `value`. 190 | * 191 | * @param {string} value 192 | * Value to check. 193 | * @param {RegExp} expression 194 | * Code-point expression. 195 | * @return {number} 196 | * Float between 0 and 1. 197 | */ 198 | function getOccurrence(value, expression) { 199 | const count = value.match(expression) 200 | 201 | return (count ? count.length : 0) / value.length || 0 202 | } 203 | 204 | /** 205 | * Get the distance between an array of trigram—count 206 | * tuples, and multiple trigram dictionaries. 207 | * 208 | * @param {Array} trigrams 209 | * An array containing trigram—count tuples. 210 | * @param {Record>} languages 211 | * Multiple trigrams to test against. 212 | * @param {Array} only 213 | * Allowed languages; if non-empty, only included languages are kept. 214 | * @param {Array} ignore 215 | * Disallowed languages; included languages are ignored. 216 | * @return {Array} An array 217 | * containing language—distance tuples. 218 | */ 219 | function getDistances(trigrams, languages, only, ignore) { 220 | languages = filterLanguages(languages, only, ignore) 221 | 222 | /** @type {Array} */ 223 | const distances = [] 224 | /** @type {string} */ 225 | let language 226 | 227 | if (languages) { 228 | for (language in languages) { 229 | if (own.call(languages, language)) { 230 | distances.push([language, getDistance(trigrams, languages[language])]) 231 | } 232 | } 233 | } 234 | 235 | return distances.length === 0 ? und() : distances.sort(sort) 236 | } 237 | 238 | /** 239 | * Get the distance between an array of trigram—count 240 | * tuples, and a language dictionary. 241 | * 242 | * @param {Array} trigrams 243 | * An array containing trigram—count tuples. 244 | * @param {Record} model 245 | * Object containing weighted trigrams. 246 | * @return {number} 247 | * The distance between the two. 248 | */ 249 | function getDistance(trigrams, model) { 250 | let distance = 0 251 | let index = -1 252 | 253 | while (++index < trigrams.length) { 254 | const trigram = trigrams[index] 255 | let difference = MAX_DIFFERENCE 256 | 257 | if (trigram[0] in model) { 258 | difference = trigram[1] - model[trigram[0]] - 1 259 | 260 | if (difference < 0) { 261 | difference = -difference 262 | } 263 | } 264 | 265 | distance += difference 266 | } 267 | 268 | return distance 269 | } 270 | 271 | /** 272 | * Filter `languages` by removing languages in 273 | * `ignore`, or including languages in `only`. 274 | * 275 | * @param {Record>} languages 276 | * Languages to filter 277 | * @param {Array} only 278 | * Allowed languages; if non-empty, only included languages are kept. 279 | * @param {Array} ignore 280 | * Disallowed languages; included languages are ignored. 281 | * @return {Record>} 282 | * Filtered array of languages. 283 | */ 284 | function filterLanguages(languages, only, ignore) { 285 | if (only.length === 0 && ignore.length === 0) { 286 | return languages 287 | } 288 | 289 | /** @type {Record>} */ 290 | const filteredLanguages = {} 291 | /** @type {string} */ 292 | let language 293 | 294 | for (language in languages) { 295 | if (allow(language, only, ignore)) { 296 | filteredLanguages[language] = languages[language] 297 | } 298 | } 299 | 300 | return filteredLanguages 301 | } 302 | 303 | /** 304 | * Check if `language` can match according to settings. 305 | * 306 | * @param {string} language 307 | * Languages to filter 308 | * @param {Array} only 309 | * Allowed languages; if non-empty, only included languages are kept. 310 | * @param {Array} ignore 311 | * Disallowed languages; included languages are ignored. 312 | * @return {boolean} 313 | * Whether `language` can match 314 | */ 315 | function allow(language, only, ignore) { 316 | if (only.length === 0 && ignore.length === 0) { 317 | return true 318 | } 319 | 320 | return ( 321 | (only.length === 0 || only.includes(language)) && !ignore.includes(language) 322 | ) 323 | } 324 | 325 | /** 326 | * Create a single `und` tuple. 327 | */ 328 | function und() { 329 | return singleLanguageTuples('und') 330 | } 331 | 332 | /** 333 | * Create a single tuple as a list of tuples from a given language code. 334 | * 335 | * @param {string} language 336 | * @returns {Array} 337 | */ 338 | function singleLanguageTuples(language) { 339 | return [[language, 1]] 340 | } 341 | 342 | /** 343 | * Deep regular sort on the number at `1` in both objects. 344 | * 345 | * @param {TrigramTuple} a 346 | * @param {TrigramTuple} b 347 | */ 348 | function sort(a, b) { 349 | return a[1] - b[1] 350 | } 351 | -------------------------------------------------------------------------------- /packages/franc/readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # franc 4 | 5 | Detect the language of text. 6 | 7 | Built with support for 186 languages (1M or more speakers). 8 | 9 | View the [monorepo](https://github.com/wooorm/franc) for more packages and 10 | info on using them. 11 | 12 | ## Install 13 | 14 | This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). 15 | In Node.js (version 14.14+, 16.0+), install with 16 | [npm](https://docs.npmjs.com/cli/install): 17 | 18 | npm: 19 | 20 | ```sh 21 | npm install franc 22 | ``` 23 | 24 | ## Data 25 | 26 | This build supports the following languages: 27 | 28 | | Code | Name | Speakers | 29 | | - | - | - | 30 | | [`cmn`](http://www-01.sil.org/iso639-3/documentation.asp?id=cmn) | Mandarin Chinese | 885M | 31 | | [`spa`](http://www-01.sil.org/iso639-3/documentation.asp?id=spa) | Spanish | 332M | 32 | | [`eng`](http://www-01.sil.org/iso639-3/documentation.asp?id=eng) | English | 322M | 33 | | [`rus`](http://www-01.sil.org/iso639-3/documentation.asp?id=rus) | Russian | 288M | 34 | | [`arb`](http://www-01.sil.org/iso639-3/documentation.asp?id=arb) | Standard Arabic | 280M | 35 | | [`ben`](http://www-01.sil.org/iso639-3/documentation.asp?id=ben) | Bengali | 196M | 36 | | [`hin`](http://www-01.sil.org/iso639-3/documentation.asp?id=hin) | Hindi | 182M | 37 | | [`por`](http://www-01.sil.org/iso639-3/documentation.asp?id=por) | Portuguese | 182M | 38 | | [`ind`](http://www-01.sil.org/iso639-3/documentation.asp?id=ind) | Indonesian | 140M | 39 | | [`jpn`](http://www-01.sil.org/iso639-3/documentation.asp?id=jpn) | Japanese | 125M | 40 | | [`fra`](http://www-01.sil.org/iso639-3/documentation.asp?id=fra) | French | 124M | 41 | | [`deu`](http://www-01.sil.org/iso639-3/documentation.asp?id=deu) | German | 121M | 42 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Javanese) | 76M | 43 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Latin) | 76M | 44 | | [`kor`](http://www-01.sil.org/iso639-3/documentation.asp?id=kor) | Korean | 75M | 45 | | [`tel`](http://www-01.sil.org/iso639-3/documentation.asp?id=tel) | Telugu | 73M | 46 | | [`vie`](http://www-01.sil.org/iso639-3/documentation.asp?id=vie) | Vietnamese | 67M | 47 | | [`mar`](http://www-01.sil.org/iso639-3/documentation.asp?id=mar) | Marathi | 65M | 48 | | [`ita`](http://www-01.sil.org/iso639-3/documentation.asp?id=ita) | Italian | 63M | 49 | | [`tam`](http://www-01.sil.org/iso639-3/documentation.asp?id=tam) | Tamil | 62M | 50 | | [`tur`](http://www-01.sil.org/iso639-3/documentation.asp?id=tur) | Turkish | 59M | 51 | | [`urd`](http://www-01.sil.org/iso639-3/documentation.asp?id=urd) | Urdu | 54M | 52 | | [`guj`](http://www-01.sil.org/iso639-3/documentation.asp?id=guj) | Gujarati | 44M | 53 | | [`pol`](http://www-01.sil.org/iso639-3/documentation.asp?id=pol) | Polish | 44M | 54 | | [`ukr`](http://www-01.sil.org/iso639-3/documentation.asp?id=ukr) | Ukrainian | 41M | 55 | | [`kan`](http://www-01.sil.org/iso639-3/documentation.asp?id=kan) | Kannada | 38M | 56 | | [`mai`](http://www-01.sil.org/iso639-3/documentation.asp?id=mai) | Maithili | 35M | 57 | | [`mal`](http://www-01.sil.org/iso639-3/documentation.asp?id=mal) | Malayalam | 34M | 58 | | [`pes`](http://www-01.sil.org/iso639-3/documentation.asp?id=pes) | Iranian Persian | 33M | 59 | | [`mya`](http://www-01.sil.org/iso639-3/documentation.asp?id=mya) | Burmese | 31M | 60 | | [`swh`](http://www-01.sil.org/iso639-3/documentation.asp?id=swh) | Swahili (individual language) | 30M | 61 | | [`sun`](http://www-01.sil.org/iso639-3/documentation.asp?id=sun) | Sundanese | 27M | 62 | | [`ron`](http://www-01.sil.org/iso639-3/documentation.asp?id=ron) | Romanian | 26M | 63 | | [`pan`](http://www-01.sil.org/iso639-3/documentation.asp?id=pan) | Panjabi | 26M | 64 | | [`bho`](http://www-01.sil.org/iso639-3/documentation.asp?id=bho) | Bhojpuri | 25M | 65 | | [`amh`](http://www-01.sil.org/iso639-3/documentation.asp?id=amh) | Amharic | 23M | 66 | | [`hau`](http://www-01.sil.org/iso639-3/documentation.asp?id=hau) | Hausa | 22M | 67 | | [`fuv`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuv) | Nigerian Fulfulde | 22M | 68 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Cyrillic) | 21M | 69 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Latin) | 21M | 70 | | [`hrv`](http://www-01.sil.org/iso639-3/documentation.asp?id=hrv) | Croatian | 21M | 71 | | [`nld`](http://www-01.sil.org/iso639-3/documentation.asp?id=nld) | Dutch | 21M | 72 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Cyrillic) | 21M | 73 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Latin) | 21M | 74 | | [`tha`](http://www-01.sil.org/iso639-3/documentation.asp?id=tha) | Thai | 21M | 75 | | [`ckb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ckb) | Central Kurdish | 20M | 76 | | [`yor`](http://www-01.sil.org/iso639-3/documentation.asp?id=yor) | Yoruba | 20M | 77 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Cyrillic) | 18M | 78 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Latin) | 18M | 79 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Arabic) | 18M | 80 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Latin) | 18M | 81 | | [`ibo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibo) | Igbo | 17M | 82 | | [`npi`](http://www-01.sil.org/iso639-3/documentation.asp?id=npi) | Nepali (individual language) | 16M | 83 | | [`ceb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ceb) | Cebuano | 15M | 84 | | [`skr`](http://www-01.sil.org/iso639-3/documentation.asp?id=skr) | Saraiki | 15M | 85 | | [`tgl`](http://www-01.sil.org/iso639-3/documentation.asp?id=tgl) | Tagalog | 15M | 86 | | [`hun`](http://www-01.sil.org/iso639-3/documentation.asp?id=hun) | Hungarian | 15M | 87 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Cyrillic) | 14M | 88 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Latin) | 14M | 89 | | [`sin`](http://www-01.sil.org/iso639-3/documentation.asp?id=sin) | Sinhala | 13M | 90 | | [`koi`](http://www-01.sil.org/iso639-3/documentation.asp?id=koi) | Komi-Permyak | 13M | 91 | | [`ell`](http://www-01.sil.org/iso639-3/documentation.asp?id=ell) | Modern Greek (1453-) | 12M | 92 | | [`ces`](http://www-01.sil.org/iso639-3/documentation.asp?id=ces) | Czech | 12M | 93 | | [`mag`](http://www-01.sil.org/iso639-3/documentation.asp?id=mag) | Magahi | 11M | 94 | | [`run`](http://www-01.sil.org/iso639-3/documentation.asp?id=run) | Rundi | 11M | 95 | | [`bel`](http://www-01.sil.org/iso639-3/documentation.asp?id=bel) | Belarusian | 10M | 96 | | [`plt`](http://www-01.sil.org/iso639-3/documentation.asp?id=plt) | Plateau Malagasy | 10M | 97 | | [`qug`](http://www-01.sil.org/iso639-3/documentation.asp?id=qug) | Chimborazo Highland Quichua | 10M | 98 | | [`mad`](http://www-01.sil.org/iso639-3/documentation.asp?id=mad) | Madurese | 10M | 99 | | [`nya`](http://www-01.sil.org/iso639-3/documentation.asp?id=nya) | Nyanja | 10M | 100 | | [`zyb`](http://www-01.sil.org/iso639-3/documentation.asp?id=zyb) | Yongbei Zhuang | 10M | 101 | | [`pbu`](http://www-01.sil.org/iso639-3/documentation.asp?id=pbu) | Northern Pashto | 10M | 102 | | [`kin`](http://www-01.sil.org/iso639-3/documentation.asp?id=kin) | Kinyarwanda | 9M | 103 | | [`zul`](http://www-01.sil.org/iso639-3/documentation.asp?id=zul) | Zulu | 9M | 104 | | [`bul`](http://www-01.sil.org/iso639-3/documentation.asp?id=bul) | Bulgarian | 9M | 105 | | [`swe`](http://www-01.sil.org/iso639-3/documentation.asp?id=swe) | Swedish | 9M | 106 | | [`lin`](http://www-01.sil.org/iso639-3/documentation.asp?id=lin) | Lingala | 8M | 107 | | [`som`](http://www-01.sil.org/iso639-3/documentation.asp?id=som) | Somali | 8M | 108 | | [`hms`](http://www-01.sil.org/iso639-3/documentation.asp?id=hms) | Southern Qiandong Miao | 8M | 109 | | [`hnj`](http://www-01.sil.org/iso639-3/documentation.asp?id=hnj) | Hmong Njua | 8M | 110 | | [`ilo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ilo) | Iloko | 8M | 111 | | [`kaz`](http://www-01.sil.org/iso639-3/documentation.asp?id=kaz) | Kazakh | 8M | 112 | | [`uig`](http://www-01.sil.org/iso639-3/documentation.asp?id=uig) | Uighur (Arabic) | 7M | 113 | | [`uig`](http://www-01.sil.org/iso639-3/documentation.asp?id=uig) | Uighur (Latin) | 7M | 114 | | [`hat`](http://www-01.sil.org/iso639-3/documentation.asp?id=hat) | Haitian | 7M | 115 | | [`khm`](http://www-01.sil.org/iso639-3/documentation.asp?id=khm) | Khmer | 7M | 116 | | [`prs`](http://www-01.sil.org/iso639-3/documentation.asp?id=prs) | Dari | 7M | 117 | | [`hil`](http://www-01.sil.org/iso639-3/documentation.asp?id=hil) | Hiligaynon | 7M | 118 | | [`sna`](http://www-01.sil.org/iso639-3/documentation.asp?id=sna) | Shona | 7M | 119 | | [`tat`](http://www-01.sil.org/iso639-3/documentation.asp?id=tat) | Tatar | 7M | 120 | | [`xho`](http://www-01.sil.org/iso639-3/documentation.asp?id=xho) | Xhosa | 7M | 121 | | [`hye`](http://www-01.sil.org/iso639-3/documentation.asp?id=hye) | Armenian | 7M | 122 | | [`min`](http://www-01.sil.org/iso639-3/documentation.asp?id=min) | Minangkabau | 7M | 123 | | [`afr`](http://www-01.sil.org/iso639-3/documentation.asp?id=afr) | Afrikaans | 6M | 124 | | [`lua`](http://www-01.sil.org/iso639-3/documentation.asp?id=lua) | Luba-Lulua | 6M | 125 | | [`sat`](http://www-01.sil.org/iso639-3/documentation.asp?id=sat) | Santali | 6M | 126 | | [`bod`](http://www-01.sil.org/iso639-3/documentation.asp?id=bod) | Tibetan | 6M | 127 | | [`tir`](http://www-01.sil.org/iso639-3/documentation.asp?id=tir) | Tigrinya | 6M | 128 | | [`fin`](http://www-01.sil.org/iso639-3/documentation.asp?id=fin) | Finnish | 6M | 129 | | [`slk`](http://www-01.sil.org/iso639-3/documentation.asp?id=slk) | Slovak | 6M | 130 | | [`tuk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tuk) | Turkmen (Cyrillic) | 5M | 131 | | [`tuk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tuk) | Turkmen (Latin) | 5M | 132 | | [`dan`](http://www-01.sil.org/iso639-3/documentation.asp?id=dan) | Danish | 5M | 133 | | [`nob`](http://www-01.sil.org/iso639-3/documentation.asp?id=nob) | Norwegian Bokmål | 5M | 134 | | [`suk`](http://www-01.sil.org/iso639-3/documentation.asp?id=suk) | Sukuma | 5M | 135 | | [`als`](http://www-01.sil.org/iso639-3/documentation.asp?id=als) | Tosk Albanian | 5M | 136 | | [`sag`](http://www-01.sil.org/iso639-3/documentation.asp?id=sag) | Sango | 5M | 137 | | [`nno`](http://www-01.sil.org/iso639-3/documentation.asp?id=nno) | Norwegian Nynorsk | 5M | 138 | | [`heb`](http://www-01.sil.org/iso639-3/documentation.asp?id=heb) | Hebrew | 5M | 139 | | [`mos`](http://www-01.sil.org/iso639-3/documentation.asp?id=mos) | Mossi | 5M | 140 | | [`tgk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tgk) | Tajik | 4M | 141 | | [`cat`](http://www-01.sil.org/iso639-3/documentation.asp?id=cat) | Catalan | 4M | 142 | | [`sot`](http://www-01.sil.org/iso639-3/documentation.asp?id=sot) | Southern Sotho | 4M | 143 | | [`kat`](http://www-01.sil.org/iso639-3/documentation.asp?id=kat) | Georgian | 4M | 144 | | [`bcl`](http://www-01.sil.org/iso639-3/documentation.asp?id=bcl) | Central Bikol | 4M | 145 | | [`glg`](http://www-01.sil.org/iso639-3/documentation.asp?id=glg) | Galician | 4M | 146 | | [`lao`](http://www-01.sil.org/iso639-3/documentation.asp?id=lao) | Lao | 4M | 147 | | [`lit`](http://www-01.sil.org/iso639-3/documentation.asp?id=lit) | Lithuanian | 4M | 148 | | [`umb`](http://www-01.sil.org/iso639-3/documentation.asp?id=umb) | Umbundu | 4M | 149 | | [`tsn`](http://www-01.sil.org/iso639-3/documentation.asp?id=tsn) | Tswana | 4M | 150 | | [`vec`](http://www-01.sil.org/iso639-3/documentation.asp?id=vec) | Venetian | 4M | 151 | | [`nso`](http://www-01.sil.org/iso639-3/documentation.asp?id=nso) | Pedi | 4M | 152 | | [`ban`](http://www-01.sil.org/iso639-3/documentation.asp?id=ban) | Balinese | 4M | 153 | | [`bug`](http://www-01.sil.org/iso639-3/documentation.asp?id=bug) | Buginese | 4M | 154 | | [`knc`](http://www-01.sil.org/iso639-3/documentation.asp?id=knc) | Central Kanuri | 4M | 155 | | [`kng`](http://www-01.sil.org/iso639-3/documentation.asp?id=kng) | Koongo | 3M | 156 | | [`ibb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibb) | Ibibio | 3M | 157 | | [`lug`](http://www-01.sil.org/iso639-3/documentation.asp?id=lug) | Ganda | 3M | 158 | | [`ace`](http://www-01.sil.org/iso639-3/documentation.asp?id=ace) | Achinese | 3M | 159 | | [`bam`](http://www-01.sil.org/iso639-3/documentation.asp?id=bam) | Bambara | 3M | 160 | | [`tzm`](http://www-01.sil.org/iso639-3/documentation.asp?id=tzm) | Central Atlas Tamazight | 3M | 161 | | [`ydd`](http://www-01.sil.org/iso639-3/documentation.asp?id=ydd) | Eastern Yiddish | 3M | 162 | | [`kmb`](http://www-01.sil.org/iso639-3/documentation.asp?id=kmb) | Kimbundu | 3M | 163 | | [`lun`](http://www-01.sil.org/iso639-3/documentation.asp?id=lun) | Lunda | 3M | 164 | | [`shn`](http://www-01.sil.org/iso639-3/documentation.asp?id=shn) | Shan | 3M | 165 | | [`war`](http://www-01.sil.org/iso639-3/documentation.asp?id=war) | Waray (Philippines) | 3M | 166 | | [`dyu`](http://www-01.sil.org/iso639-3/documentation.asp?id=dyu) | Dyula | 3M | 167 | | [`wol`](http://www-01.sil.org/iso639-3/documentation.asp?id=wol) | Wolof | 3M | 168 | | [`kir`](http://www-01.sil.org/iso639-3/documentation.asp?id=kir) | Kirghiz | 3M | 169 | | [`nds`](http://www-01.sil.org/iso639-3/documentation.asp?id=nds) | Low German | 3M | 170 | | [`fuf`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuf) | Pular | 3M | 171 | | [`mkd`](http://www-01.sil.org/iso639-3/documentation.asp?id=mkd) | Macedonian | 3M | 172 | | [`vmw`](http://www-01.sil.org/iso639-3/documentation.asp?id=vmw) | Makhuwa | 3M | 173 | | [`zgh`](http://www-01.sil.org/iso639-3/documentation.asp?id=zgh) | Standard Moroccan Tamazight | 2M | 174 | | [`ewe`](http://www-01.sil.org/iso639-3/documentation.asp?id=ewe) | Ewe | 2M | 175 | | [`khk`](http://www-01.sil.org/iso639-3/documentation.asp?id=khk) | Halh Mongolian | 2M | 176 | | [`slv`](http://www-01.sil.org/iso639-3/documentation.asp?id=slv) | Slovenian | 2M | 177 | | [`ayr`](http://www-01.sil.org/iso639-3/documentation.asp?id=ayr) | Central Aymara | 2M | 178 | | [`bem`](http://www-01.sil.org/iso639-3/documentation.asp?id=bem) | Bemba (Zambia) | 2M | 179 | | [`emk`](http://www-01.sil.org/iso639-3/documentation.asp?id=emk) | Eastern Maninkakan | 2M | 180 | | [`bci`](http://www-01.sil.org/iso639-3/documentation.asp?id=bci) | Baoulé | 2M | 181 | | [`bum`](http://www-01.sil.org/iso639-3/documentation.asp?id=bum) | Bulu (Cameroon) | 2M | 182 | | [`epo`](http://www-01.sil.org/iso639-3/documentation.asp?id=epo) | Esperanto | 2M | 183 | | [`pam`](http://www-01.sil.org/iso639-3/documentation.asp?id=pam) | Pampanga | 2M | 184 | | [`tiv`](http://www-01.sil.org/iso639-3/documentation.asp?id=tiv) | Tiv | 2M | 185 | | [`tpi`](http://www-01.sil.org/iso639-3/documentation.asp?id=tpi) | Tok Pisin | 2M | 186 | | [`ven`](http://www-01.sil.org/iso639-3/documentation.asp?id=ven) | Venda | 2M | 187 | | [`ssw`](http://www-01.sil.org/iso639-3/documentation.asp?id=ssw) | Swati | 2M | 188 | | [`nyn`](http://www-01.sil.org/iso639-3/documentation.asp?id=nyn) | Nyankole | 2M | 189 | | [`kbd`](http://www-01.sil.org/iso639-3/documentation.asp?id=kbd) | Kabardian | 2M | 190 | | [`iii`](http://www-01.sil.org/iso639-3/documentation.asp?id=iii) | Sichuan Yi | 2M | 191 | | [`yao`](http://www-01.sil.org/iso639-3/documentation.asp?id=yao) | Yao | 2M | 192 | | [`lvs`](http://www-01.sil.org/iso639-3/documentation.asp?id=lvs) | Standard Latvian | 2M | 193 | | [`quz`](http://www-01.sil.org/iso639-3/documentation.asp?id=quz) | Cusco Quechua | 2M | 194 | | [`src`](http://www-01.sil.org/iso639-3/documentation.asp?id=src) | Logudorese Sardinian | 2M | 195 | | [`rup`](http://www-01.sil.org/iso639-3/documentation.asp?id=rup) | Macedo-Romanian | 2M | 196 | | [`sco`](http://www-01.sil.org/iso639-3/documentation.asp?id=sco) | Scots | 2M | 197 | | [`tso`](http://www-01.sil.org/iso639-3/documentation.asp?id=tso) | Tsonga | 2M | 198 | | [`men`](http://www-01.sil.org/iso639-3/documentation.asp?id=men) | Mende (Sierra Leone) | 1M | 199 | | [`fon`](http://www-01.sil.org/iso639-3/documentation.asp?id=fon) | Fon | 1M | 200 | | [`nhn`](http://www-01.sil.org/iso639-3/documentation.asp?id=nhn) | Central Nahuatl | 1M | 201 | | [`dip`](http://www-01.sil.org/iso639-3/documentation.asp?id=dip) | Northeastern Dinka | 1M | 202 | | [`kde`](http://www-01.sil.org/iso639-3/documentation.asp?id=kde) | Makonde | 1M | 203 | | [`kbp`](http://www-01.sil.org/iso639-3/documentation.asp?id=kbp) | Kabiyè | 1M | 204 | | [`tem`](http://www-01.sil.org/iso639-3/documentation.asp?id=tem) | Timne | 1M | 205 | | [`toi`](http://www-01.sil.org/iso639-3/documentation.asp?id=toi) | Tonga (Zambia) | 1M | 206 | | [`ekk`](http://www-01.sil.org/iso639-3/documentation.asp?id=ekk) | Standard Estonian | 1M | 207 | | [`snk`](http://www-01.sil.org/iso639-3/documentation.asp?id=snk) | Soninke | 1M | 208 | | [`cjk`](http://www-01.sil.org/iso639-3/documentation.asp?id=cjk) | Chokwe | 1M | 209 | | [`ada`](http://www-01.sil.org/iso639-3/documentation.asp?id=ada) | Adangme | 1M | 210 | | [`aii`](http://www-01.sil.org/iso639-3/documentation.asp?id=aii) | Assyrian Neo-Aramaic | 1M | 211 | | [`quy`](http://www-01.sil.org/iso639-3/documentation.asp?id=quy) | Ayacucho Quechua | 1M | 212 | | [`rmn`](http://www-01.sil.org/iso639-3/documentation.asp?id=rmn) | Balkan Romani | 1M | 213 | | [`bin`](http://www-01.sil.org/iso639-3/documentation.asp?id=bin) | Bini | 1M | 214 | | [`gaa`](http://www-01.sil.org/iso639-3/documentation.asp?id=gaa) | Ga | 1M | 215 | | [`ndo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ndo) | Ndonga | 1M | 216 | 217 | ## License 218 | 219 | [MIT](https://github.com/wooorm/franc/blob/main/license) © [Titus Wormer](http://wooorm.com) 220 | -------------------------------------------------------------------------------- /script/build.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('type-fest').PackageJson} PackageJson 3 | * @typedef {import('mdast').Root} Root 4 | * @typedef {import('mdast').TableRow} TableRow 5 | * 6 | * @typedef Info 7 | * @property {number} score 8 | * @property {string} name 9 | * @property {string} code 10 | * @property {string|undefined} udhr 11 | * @property {string} script 12 | * @property {number|undefined} speakers 13 | */ 14 | 15 | import fs from 'node:fs/promises' 16 | import {resolve} from 'import-meta-resolve' 17 | import {isHidden} from 'is-hidden' 18 | import {iso6393} from 'iso-639-3' 19 | import {speakers as defaultSpeakers} from 'speakers' 20 | import {unified} from 'unified' 21 | import remarkGfm from 'remark-gfm' 22 | import remarkStringify from 'remark-stringify' 23 | import {fromHtml} from 'hast-util-from-html' 24 | import {select, selectAll} from 'hast-util-select' 25 | import {toString} from 'hast-util-to-string' 26 | import parseAuthor from 'parse-author' 27 | import alphaSort from 'alpha-sort' 28 | import {min} from 'trigrams' 29 | // @ts-expect-error: untyped. 30 | import unicode from '@unicode/unicode-15.0.0' 31 | import {customFixtures} from './custom-fixtures.js' 32 | 33 | /* eslint-disable no-await-in-loop */ 34 | 35 | const own = {}.hasOwnProperty 36 | 37 | const ascending = alphaSort() 38 | const scripts = unicode.Script 39 | 40 | const monorepo = new URL('../', import.meta.url) 41 | const packages = new URL('packages/', monorepo) 42 | const udhrBase = resolve('udhr', import.meta.url) 43 | /** @type {PackageJson} */ 44 | const mono = JSON.parse( 45 | String(await fs.readFile(new URL('package.json', monorepo))) 46 | ) 47 | 48 | // ISO 639-3 types to ignore. 49 | const iso6393TypeExclude = new Set(['special']) 50 | 51 | // Some languages are ignored, no matter what `threshold` is chosen. 52 | const iso6393Exclude = new Set([ 53 | // Same UDHR as ckb (Central Kurdish), but with less speakers. 54 | 'kmr' // Northern Kurdish 55 | ]) 56 | 57 | const udhrKeyPrefer = new Set([ 58 | /* Asante: 2,800,000; Fante: 1,900,000; Akuapem: 555,000. 59 | * http://www.ethnologue.com/language/aka */ 60 | 'aka_asante', 61 | 62 | // Occitan: 63 | // Provençal (before 2007: `prv`): ±350k 30 years ago; 64 | // Auvergnat (before 2007: `auv`) ±80k; 65 | // Languedocien (before 2007: `lnc`) ±300k; 66 | // 67 | // I’m not sure why `oci_1`, `oci_2`, `oci_3`, `oci_4` are classified 68 | // as `oci`, because they’re explained as Francoprovençal, Fribourg; 69 | // Francoprovençal, Savoie; Francoprovençal, Vaud; 70 | // and Francoprovençal, Valais; 71 | // which seems to be the language Franco-Provençal with a different 72 | // ISO code? 73 | 'lnc', 74 | 75 | // In 2015 Unicode added lowercase Cherokee support (which people use 76 | // in handwriting), so prefer that one. 77 | 'chr_cased', 78 | 79 | // Languages with dated translations, pick the newest. 80 | 'deu_1996', 81 | 'ron_2006', 82 | 83 | // Monotonic Greek is modern greek. 84 | 'ell_monotonic', 85 | 86 | // It says “popular” in the name? 87 | 'hat_popular', 88 | 89 | // Seems to be most popular in Nigeria: 90 | // 91 | 'hau_NG', 92 | 93 | // Huastec 94 | // About 250k speakers. 95 | // `hva` (San Luís Potosí) — 48k 96 | // `hus` (Veracruz) — 22k 97 | // `hsf` (Sierra de Otontepec) — 12k 98 | 'hva', 99 | 100 | // No real reason. 101 | 'nya_chinyanja', 102 | 103 | // Many more speakers than European. 104 | 'por_BR', 105 | 106 | // Tso in mozambique has a UDHR preview: http://www.ohchr.org/EN/UDHR/Pages/Language.aspx?LangID=tso 107 | 'tso_MZ' 108 | ]) 109 | 110 | const iso15924Exclude = new Set([ 111 | // Note these are ISO 15924 PvA’s, used for Unicode Scripts. 112 | 'Common', 113 | 'Inherited' 114 | ]) 115 | 116 | /** @type {Record} */ 117 | const speakers = { 118 | ...defaultSpeakers, 119 | // Update some counts (`speakers` uses macrolanguage) 120 | // Standard Estonian from inclusive code. 121 | ekk: defaultSpeakers.ekk || defaultSpeakers.est, 122 | // Standard Lavian from inclusive code. 123 | lvs: defaultSpeakers.lvs || defaultSpeakers.lav 124 | } 125 | 126 | /** 127 | * Map of languages where trigrams don’t work, but with a unique script. 128 | * 129 | * @type {Record} 130 | */ 131 | const scriptsForSingleLanguages = { 132 | sat: {script: 'Ol_Chiki', udhr: undefined}, 133 | iii: {script: 'Yi', udhr: 'iii'}, 134 | cmn: {script: 'Han', udhr: 'cmn_hans'} 135 | } 136 | 137 | const trigrams = await min() 138 | const expressions = await createExpressions() 139 | const topLanguages = await createTopLanguages() 140 | const doc = await fs.readFile(new URL('franc/index.js', packages), 'utf8') 141 | const files = await fs.readdir(packages) 142 | let index = -1 143 | 144 | while (++index < files.length) { 145 | const basename = files[index] 146 | 147 | if (isHidden(basename)) continue 148 | 149 | const base = new URL(basename + '/', packages) 150 | /** @type {PackageJson} */ 151 | const pack = JSON.parse( 152 | String(await fs.readFile(new URL('package.json', base))) 153 | ) 154 | /** @type {number|undefined} */ 155 | // @ts-expect-error: custom field. 156 | const threshold = pack.threshold 157 | /** @type {Array} */ 158 | const support = [] 159 | /** @type {Record} */ 160 | const regularExpressions = {} /* Ha! */ 161 | /** @type {Record>} */ 162 | const perScript = {} 163 | /** @type {Record>} */ 164 | const data = {} 165 | let list = topLanguages 166 | 167 | if (!threshold) { 168 | console.log('\nNo `threshold` field in `%s`', pack.name) 169 | continue 170 | } 171 | 172 | console.log() 173 | console.log('%s, threshold: %s', pack.name, threshold) 174 | 175 | if (threshold !== -1) { 176 | list = list.filter( 177 | (info) => typeof info.speakers === 'number' && info.speakers >= threshold 178 | ) 179 | } 180 | 181 | /** @type {Record>} */ 182 | const byScript = {} 183 | let offset = -1 184 | 185 | while (++offset < list.length) { 186 | const info = list[offset] 187 | const script = info.script 188 | 189 | if (!byScript[script]) { 190 | byScript[script] = [] 191 | } 192 | 193 | byScript[script].push(info) 194 | } 195 | 196 | /** @type {string} */ 197 | let script 198 | 199 | for (script in byScript) { 200 | if (own.call(byScript, script)) { 201 | const languages = byScript[script] 202 | 203 | if (languages.length > 1) { 204 | if (!regularExpressions[script]) { 205 | regularExpressions[script] = expressions[script] 206 | } 207 | 208 | perScript[script] = languages 209 | } else { 210 | support.push(languages[0]) 211 | regularExpressions[languages[0].code] = expressions[script] 212 | } 213 | } 214 | } 215 | 216 | for (script in perScript) { 217 | if (own.call(perScript, script)) { 218 | const scripts = perScript[script] 219 | /** @type {Record} */ 220 | const scriptObject = {} 221 | let index = -1 222 | 223 | data[script] = scriptObject 224 | 225 | while (++index < scripts.length) { 226 | const info = scripts[index] 227 | 228 | if (info.udhr && info.udhr in trigrams) { 229 | support.push(info) 230 | scriptObject[info.code] = trigrams[info.udhr] 231 | .concat() 232 | .reverse() 233 | .join('|') 234 | } else { 235 | console.log( 236 | ' Ignoring language without trigrams: %s (%s, %s)', 237 | info.code, 238 | info.name, 239 | script 240 | ) 241 | } 242 | } 243 | } 244 | } 245 | 246 | // Push Japanese. 247 | // Unicode Kanji Table from: 248 | // 249 | const kanjiRegexSource = /[\u3400-\u4DB5\u4E00-\u9FAF]/ 250 | regularExpressions.jpn = new RegExp( 251 | expressions.Hiragana.source + 252 | '|' + 253 | expressions.Katakana.source + 254 | '|' + 255 | kanjiRegexSource.source, 256 | 'g' 257 | ) 258 | 259 | support.sort(sort) 260 | 261 | await fs.writeFile( 262 | new URL('expressions.js', base), 263 | generateExpressions(regularExpressions) 264 | ) 265 | 266 | await fs.writeFile( 267 | new URL('data.js', base), 268 | [ 269 | '/** @type {Record>} */', 270 | 'export const data = ' + JSON.stringify(data, null, 2), 271 | '' 272 | ].join('\n') 273 | ) 274 | 275 | await fs.writeFile(new URL('readme.md', base), generateReadme(pack, support)) 276 | 277 | if (pack.name !== mono.name) { 278 | await fs.writeFile( 279 | new URL('index.js', base), 280 | '// This file is generated by `build.js`\n' + doc 281 | ) 282 | } 283 | 284 | console.log('✓ %s w/ %s languages', pack.name, list.length) 285 | 286 | if (pack.name !== mono.name) { 287 | continue 288 | } 289 | 290 | console.log() 291 | console.log('Creating fixtures') 292 | 293 | /** @type {Record} */ 294 | const fixtures = {} 295 | offset = -1 296 | 297 | while (++offset < support.length) { 298 | const language = support[offset] 299 | const key = language.udhr || language.code 300 | let fixture = '' 301 | 302 | if (key in customFixtures) { 303 | fixture = customFixtures[key] 304 | } else if (language.udhr) { 305 | const tree = fromHtml( 306 | await fs.readFile( 307 | new URL('declaration/' + language.udhr + '.html', udhrBase) 308 | ) 309 | ) 310 | 311 | let nodes = selectAll('header p', tree) 312 | 313 | if (nodes.length === 0) { 314 | nodes = selectAll( 315 | 'body > :is(h1, h2, h3, h4, h5, h6), header :is(h1, h2, h3, h4, h5, h6)', 316 | tree 317 | ) 318 | } 319 | 320 | fixture = nodes.map((d) => toString(d)).join(' ') 321 | } 322 | 323 | if (!fixture) { 324 | console.log( 325 | ' Could not access preamble or note for `%s` (%s). No fixture is generated.', 326 | language.code, 327 | language.udhr 328 | ) 329 | } 330 | 331 | fixtures[key] = { 332 | iso6393: language.code, 333 | fixture: fixture.slice(0, 1000) 334 | } 335 | } 336 | 337 | await fs.writeFile( 338 | new URL('test/fixtures.js', monorepo), 339 | [ 340 | '/** @type {Record} */', 341 | 'export const fixtures = ' + JSON.stringify(fixtures, null, 2) 342 | ].join('\n') 343 | ) 344 | 345 | console.log('✓ fixtures') 346 | } 347 | 348 | /** 349 | * @param {Record} expressions 350 | */ 351 | function generateExpressions(expressions) { 352 | return [ 353 | '// This file is generated by `build.js`.', 354 | '/** @type {Record} */', 355 | 'export const expressions = {', 356 | ' ' + 357 | Object.keys(expressions) 358 | .map((script) => script + ': ' + expressions[script]) 359 | .join(',\n '), 360 | '}', 361 | '' 362 | ].join('\n') 363 | } 364 | 365 | /** 366 | * @param {PackageJson} pack 367 | * @param {Array} list 368 | */ 369 | function generateReadme(pack, list) { 370 | /** @type {number} */ 371 | // @ts-expect-error: custom field. 372 | const threshold = pack.threshold 373 | const counts = count(list) 374 | const licensee = 375 | typeof pack.author === 'string' ? parseAuthor(pack.author) : pack.author 376 | /** @type {Root} */ 377 | const tree = { 378 | type: 'root', 379 | children: [ 380 | {type: 'html', value: ''}, 381 | { 382 | type: 'heading', 383 | depth: 1, 384 | children: [{type: 'text', value: String(pack.name)}] 385 | }, 386 | { 387 | type: 'paragraph', 388 | children: [{type: 'text', value: pack.description + '.'}] 389 | }, 390 | { 391 | type: 'paragraph', 392 | children: [ 393 | { 394 | type: 'text', 395 | value: 396 | 'Built with support for ' + 397 | list.length + 398 | ' languages' + 399 | (threshold === -1 400 | ? '' 401 | : ' (' + 402 | threshold.toLocaleString('en', {notation: 'compact'}) + 403 | ' or more speakers)') + 404 | '.' 405 | } 406 | ] 407 | }, 408 | { 409 | type: 'paragraph', 410 | children: [ 411 | {type: 'text', value: 'View the '}, 412 | { 413 | type: 'link', 414 | url: String(mono.repository), 415 | children: [{type: 'text', value: 'monorepo'}] 416 | }, 417 | {type: 'text', value: ' for more packages and\ninfo on using them.'} 418 | ] 419 | }, 420 | { 421 | type: 'heading', 422 | depth: 2, 423 | children: [{type: 'text', value: 'Install'}] 424 | }, 425 | { 426 | type: 'paragraph', 427 | children: [ 428 | {type: 'text', value: 'This package is '}, 429 | { 430 | type: 'link', 431 | url: 'https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c', 432 | children: [{type: 'text', value: 'ESM only'}] 433 | }, 434 | { 435 | type: 'text', 436 | value: '.\nIn Node.js (version 14.14+, 16.0+), install with\n' 437 | }, 438 | { 439 | type: 'link', 440 | url: 'https://docs.npmjs.com/cli/install', 441 | children: [{type: 'text', value: 'npm'}] 442 | }, 443 | {type: 'text', value: ':'} 444 | ] 445 | }, 446 | {type: 'paragraph', children: [{type: 'text', value: 'npm:'}]}, 447 | {type: 'code', lang: 'sh', value: 'npm install ' + pack.name}, 448 | { 449 | type: 'heading', 450 | depth: 2, 451 | children: [{type: 'text', value: 'Data'}] 452 | }, 453 | { 454 | type: 'paragraph', 455 | children: [ 456 | { 457 | type: 'text', 458 | value: 'This build supports the following languages:' 459 | } 460 | ] 461 | }, 462 | { 463 | type: 'table', 464 | align: [], 465 | children: [ 466 | { 467 | type: 'tableRow', 468 | children: [ 469 | {type: 'tableCell', children: [{type: 'text', value: 'Code'}]}, 470 | {type: 'tableCell', children: [{type: 'text', value: 'Name'}]}, 471 | { 472 | type: 'tableCell', 473 | children: [{type: 'text', value: 'Speakers'}] 474 | } 475 | ] 476 | }, 477 | ...list.map((info) => { 478 | /** @type {TableRow} */ 479 | const row = { 480 | type: 'tableRow', 481 | children: [ 482 | { 483 | type: 'tableCell', 484 | children: [ 485 | { 486 | type: 'link', 487 | url: 488 | 'http://www-01.sil.org/iso639-3/documentation.asp?id=' + 489 | info.code, 490 | title: null, 491 | children: [{type: 'inlineCode', value: info.code}] 492 | } 493 | ] 494 | }, 495 | { 496 | type: 'tableCell', 497 | children: [ 498 | { 499 | type: 'text', 500 | value: 501 | info.name + 502 | (counts[info.code] === 1 503 | ? '' 504 | : ' (' + info.script + ')') 505 | } 506 | ] 507 | }, 508 | { 509 | type: 'tableCell', 510 | children: [ 511 | { 512 | type: 'text', 513 | value: 514 | typeof info.speakers === 'number' 515 | ? info.speakers.toLocaleString('en', { 516 | notation: 'compact', 517 | maximumFractionDigits: 0 518 | }) 519 | : 'unknown' 520 | } 521 | ] 522 | } 523 | ] 524 | } 525 | 526 | return row 527 | }) 528 | ] 529 | }, 530 | { 531 | type: 'heading', 532 | depth: 2, 533 | children: [{type: 'text', value: 'License'}] 534 | }, 535 | { 536 | type: 'paragraph', 537 | children: [ 538 | { 539 | type: 'link', 540 | url: mono.repository + '/blob/main/license', 541 | children: [{type: 'text', value: String(mono.license)}] 542 | }, 543 | {type: 'text', value: ' © '}, 544 | { 545 | type: 'link', 546 | url: String((licensee || {}).url), 547 | children: [{type: 'text', value: String((licensee || {}).name)}] 548 | } 549 | ] 550 | } 551 | ] 552 | } 553 | 554 | return unified().use(remarkStringify).use(remarkGfm).stringify(tree) 555 | } 556 | 557 | /** 558 | * @param {Array} list 559 | */ 560 | function count(list) { 561 | /** @type {Record} */ 562 | const map = {} 563 | let index = -1 564 | 565 | while (++index < list.length) { 566 | const info = list[index] 567 | map[info.code] = (map[info.code] || 0) + 1 568 | } 569 | 570 | return map 571 | } 572 | 573 | /** 574 | * Sort a list of languages by most-popular. 575 | * 576 | * @param {Info} a 577 | * @param {Info} b 578 | * @returns {number} 579 | */ 580 | function sort(a, b) { 581 | return ( 582 | (b.speakers || 0) - (a.speakers || 0) || 583 | ascending(a.name, b.name) || 584 | ascending(a.script, b.script) 585 | ) 586 | } 587 | 588 | // eslint-disable-next-line complexity 589 | async function createTopLanguages() { 590 | /** @type {Array} */ 591 | const list = [] 592 | /** @type {string} */ 593 | let udhrKey 594 | 595 | for (udhrKey in trigrams) { 596 | if (own.call(trigrams, udhrKey)) { 597 | const declaration = String( 598 | await fs.readFile(new URL('declaration/' + udhrKey + '.html', udhrBase)) 599 | ) 600 | const tree = fromHtml(declaration) 601 | const root = select('html', tree) 602 | 603 | if ( 604 | !root || 605 | !root.properties || 606 | typeof root.properties.dataIso6393 !== 'string' 607 | ) { 608 | throw new TypeError('Missing `html[data-iso6393]` in `' + udhrKey + '`') 609 | } 610 | 611 | const code = root.properties.dataIso6393 612 | 613 | const info = iso6393.find((d) => d.iso6393 === code) 614 | 615 | if (!info) { 616 | throw new Error( 617 | 'Could not find valid `iso-639-3` entry for `' + code + '`' 618 | ) 619 | } 620 | 621 | if (iso6393TypeExclude.has(info.type)) { 622 | console.log('Ignoring special code `%s`', udhrKey) 623 | continue 624 | } 625 | 626 | if (iso6393Exclude.has(code)) { 627 | console.log('Ignoring unsafe language `%s`', udhrKey) 628 | continue 629 | } 630 | 631 | let content = '' 632 | 633 | if (info) { 634 | content = selectAll('article p', root) 635 | .map((d) => toString(d)) 636 | .join(' ') 637 | } 638 | 639 | /** @type {Record} */ 640 | const scriptCounts = {} 641 | /** @type {string} */ 642 | let script 643 | 644 | for (script in expressions) { 645 | if (own.call(expressions, script) && !iso15924Exclude.has(script)) { 646 | const countMatch = content.match(expressions[script]) 647 | const count = 648 | Math.round( 649 | ((countMatch ? countMatch.length : 0) / content.length) * 100 650 | ) / 100 651 | 652 | if (count > 0.05) { 653 | scriptCounts[script] = count 654 | } 655 | } 656 | } 657 | 658 | // Japanese is different. 659 | const scripts = 660 | code === 'jpn' 661 | ? ['Hiragana, Katakana, and Han'] 662 | : code === 'idu' 663 | ? ['Latin'] // Mostly Latin. 664 | : Object.keys(scriptCounts) 665 | 666 | if (scripts.length > 1) { 667 | console.log('scripts:', scriptCounts) 668 | throw new Error( 669 | 'Woops, I found a declaration (`' + 670 | udhrKey + 671 | '`) which uses more than one script. Franc is not build for that. Exiting.' 672 | ) 673 | } 674 | 675 | const lettersOnly = udhrKey.replace(/[^a-z]+/g, '') 676 | 677 | let score = 1 678 | 679 | if (udhrKeyPrefer.has(udhrKey)) { 680 | score *= 2 681 | } else { 682 | // Loose points for number of underscores and digits. 683 | score /= udhrKey.length - lettersOnly.length + 1 684 | } 685 | 686 | list.push({ 687 | score, 688 | name: info.name, 689 | code, 690 | udhr: udhrKey, 691 | script: scripts[0], 692 | speakers: code in speakers ? speakers[code] : undefined 693 | }) 694 | } 695 | } 696 | 697 | let index = -1 698 | while (++index < iso6393.length) { 699 | const {name, iso6393: code} = iso6393[index] 700 | 701 | // Manual scripts for languages without trigrams. 702 | if (own.call(scriptsForSingleLanguages, code)) { 703 | const info = scriptsForSingleLanguages[code] 704 | list.push({ 705 | score: 1, 706 | name, 707 | code, 708 | udhr: info.udhr, 709 | script: info.script, 710 | speakers: speakers[code] 711 | }) 712 | } 713 | } 714 | 715 | /** @type {Record>} */ 716 | const byIsoAndScript = {} 717 | index = -1 718 | 719 | while (++index < list.length) { 720 | const info = list[index] 721 | const key = info.code + ':' + info.script 722 | const similar = byIsoAndScript[key] || (byIsoAndScript[key] = []) 723 | similar.push(info) 724 | } 725 | 726 | /** @type {Array} */ 727 | const bestScores = [] 728 | /** @type {string} */ 729 | let key 730 | 731 | for (key in byIsoAndScript) { 732 | if (own.call(byIsoAndScript, key)) { 733 | const list = byIsoAndScript[key] 734 | 735 | // High score first. 736 | list.sort((a, b) => b.score - a.score) 737 | 738 | if (list[1] && list[0].score === list[1].score) { 739 | console.log( 740 | 'Not sure which one to pick, please prefer one specific UDHR key', 741 | list 742 | ) 743 | } 744 | 745 | bestScores.push(list[0]) 746 | } 747 | } 748 | 749 | return bestScores.sort(sort) 750 | } 751 | 752 | async function createExpressions() { 753 | /** @type {Record} */ 754 | const result = {} 755 | 756 | await Promise.all( 757 | scripts.map( 758 | /** 759 | * @param {string} script 760 | */ 761 | async (script) => { 762 | /** @type {{default: RegExp}} */ 763 | const mod = await import( 764 | '@unicode/unicode-15.0.0/Script/' + script + '/regex.js' 765 | ) 766 | result[script] = new RegExp(mod.default.source, 'g') 767 | } 768 | ) 769 | ) 770 | 771 | return result 772 | } 773 | 774 | /* eslint-enable no-await-in-loop */ 775 | -------------------------------------------------------------------------------- /packages/franc-all/readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # franc-all 4 | 5 | Detect the language of text. 6 | 7 | Built with support for 419 languages. 8 | 9 | View the [monorepo](https://github.com/wooorm/franc) for more packages and 10 | info on using them. 11 | 12 | ## Install 13 | 14 | This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). 15 | In Node.js (version 14.14+, 16.0+), install with 16 | [npm](https://docs.npmjs.com/cli/install): 17 | 18 | npm: 19 | 20 | ```sh 21 | npm install franc-all 22 | ``` 23 | 24 | ## Data 25 | 26 | This build supports the following languages: 27 | 28 | | Code | Name | Speakers | 29 | | - | - | - | 30 | | [`cmn`](http://www-01.sil.org/iso639-3/documentation.asp?id=cmn) | Mandarin Chinese | 885M | 31 | | [`spa`](http://www-01.sil.org/iso639-3/documentation.asp?id=spa) | Spanish | 332M | 32 | | [`eng`](http://www-01.sil.org/iso639-3/documentation.asp?id=eng) | English | 322M | 33 | | [`rus`](http://www-01.sil.org/iso639-3/documentation.asp?id=rus) | Russian | 288M | 34 | | [`arb`](http://www-01.sil.org/iso639-3/documentation.asp?id=arb) | Standard Arabic | 280M | 35 | | [`ben`](http://www-01.sil.org/iso639-3/documentation.asp?id=ben) | Bengali | 196M | 36 | | [`hin`](http://www-01.sil.org/iso639-3/documentation.asp?id=hin) | Hindi | 182M | 37 | | [`por`](http://www-01.sil.org/iso639-3/documentation.asp?id=por) | Portuguese | 182M | 38 | | [`ind`](http://www-01.sil.org/iso639-3/documentation.asp?id=ind) | Indonesian | 140M | 39 | | [`jpn`](http://www-01.sil.org/iso639-3/documentation.asp?id=jpn) | Japanese | 125M | 40 | | [`fra`](http://www-01.sil.org/iso639-3/documentation.asp?id=fra) | French | 124M | 41 | | [`deu`](http://www-01.sil.org/iso639-3/documentation.asp?id=deu) | German | 121M | 42 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Javanese) | 76M | 43 | | [`jav`](http://www-01.sil.org/iso639-3/documentation.asp?id=jav) | Javanese (Latin) | 76M | 44 | | [`kor`](http://www-01.sil.org/iso639-3/documentation.asp?id=kor) | Korean | 75M | 45 | | [`tel`](http://www-01.sil.org/iso639-3/documentation.asp?id=tel) | Telugu | 73M | 46 | | [`vie`](http://www-01.sil.org/iso639-3/documentation.asp?id=vie) | Vietnamese | 67M | 47 | | [`mar`](http://www-01.sil.org/iso639-3/documentation.asp?id=mar) | Marathi | 65M | 48 | | [`ita`](http://www-01.sil.org/iso639-3/documentation.asp?id=ita) | Italian | 63M | 49 | | [`tam`](http://www-01.sil.org/iso639-3/documentation.asp?id=tam) | Tamil | 62M | 50 | | [`tur`](http://www-01.sil.org/iso639-3/documentation.asp?id=tur) | Turkish | 59M | 51 | | [`urd`](http://www-01.sil.org/iso639-3/documentation.asp?id=urd) | Urdu | 54M | 52 | | [`guj`](http://www-01.sil.org/iso639-3/documentation.asp?id=guj) | Gujarati | 44M | 53 | | [`pol`](http://www-01.sil.org/iso639-3/documentation.asp?id=pol) | Polish | 44M | 54 | | [`ukr`](http://www-01.sil.org/iso639-3/documentation.asp?id=ukr) | Ukrainian | 41M | 55 | | [`kan`](http://www-01.sil.org/iso639-3/documentation.asp?id=kan) | Kannada | 38M | 56 | | [`mai`](http://www-01.sil.org/iso639-3/documentation.asp?id=mai) | Maithili | 35M | 57 | | [`mal`](http://www-01.sil.org/iso639-3/documentation.asp?id=mal) | Malayalam | 34M | 58 | | [`pes`](http://www-01.sil.org/iso639-3/documentation.asp?id=pes) | Iranian Persian | 33M | 59 | | [`mya`](http://www-01.sil.org/iso639-3/documentation.asp?id=mya) | Burmese | 31M | 60 | | [`swh`](http://www-01.sil.org/iso639-3/documentation.asp?id=swh) | Swahili (individual language) | 30M | 61 | | [`sun`](http://www-01.sil.org/iso639-3/documentation.asp?id=sun) | Sundanese | 27M | 62 | | [`ron`](http://www-01.sil.org/iso639-3/documentation.asp?id=ron) | Romanian | 26M | 63 | | [`pan`](http://www-01.sil.org/iso639-3/documentation.asp?id=pan) | Panjabi | 26M | 64 | | [`bho`](http://www-01.sil.org/iso639-3/documentation.asp?id=bho) | Bhojpuri | 25M | 65 | | [`amh`](http://www-01.sil.org/iso639-3/documentation.asp?id=amh) | Amharic | 23M | 66 | | [`hau`](http://www-01.sil.org/iso639-3/documentation.asp?id=hau) | Hausa | 22M | 67 | | [`fuv`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuv) | Nigerian Fulfulde | 22M | 68 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Cyrillic) | 21M | 69 | | [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Latin) | 21M | 70 | | [`hrv`](http://www-01.sil.org/iso639-3/documentation.asp?id=hrv) | Croatian | 21M | 71 | | [`nld`](http://www-01.sil.org/iso639-3/documentation.asp?id=nld) | Dutch | 21M | 72 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Cyrillic) | 21M | 73 | | [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Latin) | 21M | 74 | | [`tha`](http://www-01.sil.org/iso639-3/documentation.asp?id=tha) | Thai | 21M | 75 | | [`ckb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ckb) | Central Kurdish | 20M | 76 | | [`yor`](http://www-01.sil.org/iso639-3/documentation.asp?id=yor) | Yoruba | 20M | 77 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Cyrillic) | 18M | 78 | | [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Latin) | 18M | 79 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Arabic) | 18M | 80 | | [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Latin) | 18M | 81 | | [`ibo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibo) | Igbo | 17M | 82 | | [`npi`](http://www-01.sil.org/iso639-3/documentation.asp?id=npi) | Nepali (individual language) | 16M | 83 | | [`ceb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ceb) | Cebuano | 15M | 84 | | [`skr`](http://www-01.sil.org/iso639-3/documentation.asp?id=skr) | Saraiki | 15M | 85 | | [`tgl`](http://www-01.sil.org/iso639-3/documentation.asp?id=tgl) | Tagalog | 15M | 86 | | [`hun`](http://www-01.sil.org/iso639-3/documentation.asp?id=hun) | Hungarian | 15M | 87 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Cyrillic) | 14M | 88 | | [`azj`](http://www-01.sil.org/iso639-3/documentation.asp?id=azj) | North Azerbaijani (Latin) | 14M | 89 | | [`sin`](http://www-01.sil.org/iso639-3/documentation.asp?id=sin) | Sinhala | 13M | 90 | | [`koi`](http://www-01.sil.org/iso639-3/documentation.asp?id=koi) | Komi-Permyak | 13M | 91 | | [`ell`](http://www-01.sil.org/iso639-3/documentation.asp?id=ell) | Modern Greek (1453-) | 12M | 92 | | [`ces`](http://www-01.sil.org/iso639-3/documentation.asp?id=ces) | Czech | 12M | 93 | | [`mag`](http://www-01.sil.org/iso639-3/documentation.asp?id=mag) | Magahi | 11M | 94 | | [`run`](http://www-01.sil.org/iso639-3/documentation.asp?id=run) | Rundi | 11M | 95 | | [`bel`](http://www-01.sil.org/iso639-3/documentation.asp?id=bel) | Belarusian | 10M | 96 | | [`plt`](http://www-01.sil.org/iso639-3/documentation.asp?id=plt) | Plateau Malagasy | 10M | 97 | | [`qug`](http://www-01.sil.org/iso639-3/documentation.asp?id=qug) | Chimborazo Highland Quichua | 10M | 98 | | [`mad`](http://www-01.sil.org/iso639-3/documentation.asp?id=mad) | Madurese | 10M | 99 | | [`nya`](http://www-01.sil.org/iso639-3/documentation.asp?id=nya) | Nyanja | 10M | 100 | | [`zyb`](http://www-01.sil.org/iso639-3/documentation.asp?id=zyb) | Yongbei Zhuang | 10M | 101 | | [`pbu`](http://www-01.sil.org/iso639-3/documentation.asp?id=pbu) | Northern Pashto | 10M | 102 | | [`kin`](http://www-01.sil.org/iso639-3/documentation.asp?id=kin) | Kinyarwanda | 9M | 103 | | [`zul`](http://www-01.sil.org/iso639-3/documentation.asp?id=zul) | Zulu | 9M | 104 | | [`bul`](http://www-01.sil.org/iso639-3/documentation.asp?id=bul) | Bulgarian | 9M | 105 | | [`swe`](http://www-01.sil.org/iso639-3/documentation.asp?id=swe) | Swedish | 9M | 106 | | [`lin`](http://www-01.sil.org/iso639-3/documentation.asp?id=lin) | Lingala | 8M | 107 | | [`som`](http://www-01.sil.org/iso639-3/documentation.asp?id=som) | Somali | 8M | 108 | | [`hms`](http://www-01.sil.org/iso639-3/documentation.asp?id=hms) | Southern Qiandong Miao | 8M | 109 | | [`hnj`](http://www-01.sil.org/iso639-3/documentation.asp?id=hnj) | Hmong Njua | 8M | 110 | | [`ilo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ilo) | Iloko | 8M | 111 | | [`kaz`](http://www-01.sil.org/iso639-3/documentation.asp?id=kaz) | Kazakh | 8M | 112 | | [`uig`](http://www-01.sil.org/iso639-3/documentation.asp?id=uig) | Uighur (Arabic) | 7M | 113 | | [`uig`](http://www-01.sil.org/iso639-3/documentation.asp?id=uig) | Uighur (Latin) | 7M | 114 | | [`hat`](http://www-01.sil.org/iso639-3/documentation.asp?id=hat) | Haitian | 7M | 115 | | [`khm`](http://www-01.sil.org/iso639-3/documentation.asp?id=khm) | Khmer | 7M | 116 | | [`prs`](http://www-01.sil.org/iso639-3/documentation.asp?id=prs) | Dari | 7M | 117 | | [`hil`](http://www-01.sil.org/iso639-3/documentation.asp?id=hil) | Hiligaynon | 7M | 118 | | [`sna`](http://www-01.sil.org/iso639-3/documentation.asp?id=sna) | Shona | 7M | 119 | | [`tat`](http://www-01.sil.org/iso639-3/documentation.asp?id=tat) | Tatar | 7M | 120 | | [`xho`](http://www-01.sil.org/iso639-3/documentation.asp?id=xho) | Xhosa | 7M | 121 | | [`hye`](http://www-01.sil.org/iso639-3/documentation.asp?id=hye) | Armenian | 7M | 122 | | [`min`](http://www-01.sil.org/iso639-3/documentation.asp?id=min) | Minangkabau | 7M | 123 | | [`afr`](http://www-01.sil.org/iso639-3/documentation.asp?id=afr) | Afrikaans | 6M | 124 | | [`lua`](http://www-01.sil.org/iso639-3/documentation.asp?id=lua) | Luba-Lulua | 6M | 125 | | [`sat`](http://www-01.sil.org/iso639-3/documentation.asp?id=sat) | Santali | 6M | 126 | | [`bod`](http://www-01.sil.org/iso639-3/documentation.asp?id=bod) | Tibetan | 6M | 127 | | [`tir`](http://www-01.sil.org/iso639-3/documentation.asp?id=tir) | Tigrinya | 6M | 128 | | [`fin`](http://www-01.sil.org/iso639-3/documentation.asp?id=fin) | Finnish | 6M | 129 | | [`slk`](http://www-01.sil.org/iso639-3/documentation.asp?id=slk) | Slovak | 6M | 130 | | [`tuk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tuk) | Turkmen (Cyrillic) | 5M | 131 | | [`tuk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tuk) | Turkmen (Latin) | 5M | 132 | | [`dan`](http://www-01.sil.org/iso639-3/documentation.asp?id=dan) | Danish | 5M | 133 | | [`nob`](http://www-01.sil.org/iso639-3/documentation.asp?id=nob) | Norwegian Bokmål | 5M | 134 | | [`suk`](http://www-01.sil.org/iso639-3/documentation.asp?id=suk) | Sukuma | 5M | 135 | | [`als`](http://www-01.sil.org/iso639-3/documentation.asp?id=als) | Tosk Albanian | 5M | 136 | | [`sag`](http://www-01.sil.org/iso639-3/documentation.asp?id=sag) | Sango | 5M | 137 | | [`nno`](http://www-01.sil.org/iso639-3/documentation.asp?id=nno) | Norwegian Nynorsk | 5M | 138 | | [`heb`](http://www-01.sil.org/iso639-3/documentation.asp?id=heb) | Hebrew | 5M | 139 | | [`mos`](http://www-01.sil.org/iso639-3/documentation.asp?id=mos) | Mossi | 5M | 140 | | [`tgk`](http://www-01.sil.org/iso639-3/documentation.asp?id=tgk) | Tajik | 4M | 141 | | [`cat`](http://www-01.sil.org/iso639-3/documentation.asp?id=cat) | Catalan | 4M | 142 | | [`sot`](http://www-01.sil.org/iso639-3/documentation.asp?id=sot) | Southern Sotho | 4M | 143 | | [`kat`](http://www-01.sil.org/iso639-3/documentation.asp?id=kat) | Georgian | 4M | 144 | | [`bcl`](http://www-01.sil.org/iso639-3/documentation.asp?id=bcl) | Central Bikol | 4M | 145 | | [`glg`](http://www-01.sil.org/iso639-3/documentation.asp?id=glg) | Galician | 4M | 146 | | [`lao`](http://www-01.sil.org/iso639-3/documentation.asp?id=lao) | Lao | 4M | 147 | | [`lit`](http://www-01.sil.org/iso639-3/documentation.asp?id=lit) | Lithuanian | 4M | 148 | | [`umb`](http://www-01.sil.org/iso639-3/documentation.asp?id=umb) | Umbundu | 4M | 149 | | [`tsn`](http://www-01.sil.org/iso639-3/documentation.asp?id=tsn) | Tswana | 4M | 150 | | [`vec`](http://www-01.sil.org/iso639-3/documentation.asp?id=vec) | Venetian | 4M | 151 | | [`nso`](http://www-01.sil.org/iso639-3/documentation.asp?id=nso) | Pedi | 4M | 152 | | [`ban`](http://www-01.sil.org/iso639-3/documentation.asp?id=ban) | Balinese | 4M | 153 | | [`bug`](http://www-01.sil.org/iso639-3/documentation.asp?id=bug) | Buginese | 4M | 154 | | [`knc`](http://www-01.sil.org/iso639-3/documentation.asp?id=knc) | Central Kanuri | 4M | 155 | | [`kng`](http://www-01.sil.org/iso639-3/documentation.asp?id=kng) | Koongo | 3M | 156 | | [`ibb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibb) | Ibibio | 3M | 157 | | [`lug`](http://www-01.sil.org/iso639-3/documentation.asp?id=lug) | Ganda | 3M | 158 | | [`ace`](http://www-01.sil.org/iso639-3/documentation.asp?id=ace) | Achinese | 3M | 159 | | [`bam`](http://www-01.sil.org/iso639-3/documentation.asp?id=bam) | Bambara | 3M | 160 | | [`tzm`](http://www-01.sil.org/iso639-3/documentation.asp?id=tzm) | Central Atlas Tamazight | 3M | 161 | | [`ydd`](http://www-01.sil.org/iso639-3/documentation.asp?id=ydd) | Eastern Yiddish | 3M | 162 | | [`kmb`](http://www-01.sil.org/iso639-3/documentation.asp?id=kmb) | Kimbundu | 3M | 163 | | [`lun`](http://www-01.sil.org/iso639-3/documentation.asp?id=lun) | Lunda | 3M | 164 | | [`shn`](http://www-01.sil.org/iso639-3/documentation.asp?id=shn) | Shan | 3M | 165 | | [`war`](http://www-01.sil.org/iso639-3/documentation.asp?id=war) | Waray (Philippines) | 3M | 166 | | [`dyu`](http://www-01.sil.org/iso639-3/documentation.asp?id=dyu) | Dyula | 3M | 167 | | [`wol`](http://www-01.sil.org/iso639-3/documentation.asp?id=wol) | Wolof | 3M | 168 | | [`kir`](http://www-01.sil.org/iso639-3/documentation.asp?id=kir) | Kirghiz | 3M | 169 | | [`nds`](http://www-01.sil.org/iso639-3/documentation.asp?id=nds) | Low German | 3M | 170 | | [`fuf`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuf) | Pular | 3M | 171 | | [`mkd`](http://www-01.sil.org/iso639-3/documentation.asp?id=mkd) | Macedonian | 3M | 172 | | [`vmw`](http://www-01.sil.org/iso639-3/documentation.asp?id=vmw) | Makhuwa | 3M | 173 | | [`zgh`](http://www-01.sil.org/iso639-3/documentation.asp?id=zgh) | Standard Moroccan Tamazight | 2M | 174 | | [`ewe`](http://www-01.sil.org/iso639-3/documentation.asp?id=ewe) | Ewe | 2M | 175 | | [`khk`](http://www-01.sil.org/iso639-3/documentation.asp?id=khk) | Halh Mongolian | 2M | 176 | | [`slv`](http://www-01.sil.org/iso639-3/documentation.asp?id=slv) | Slovenian | 2M | 177 | | [`ayr`](http://www-01.sil.org/iso639-3/documentation.asp?id=ayr) | Central Aymara | 2M | 178 | | [`bem`](http://www-01.sil.org/iso639-3/documentation.asp?id=bem) | Bemba (Zambia) | 2M | 179 | | [`emk`](http://www-01.sil.org/iso639-3/documentation.asp?id=emk) | Eastern Maninkakan | 2M | 180 | | [`bci`](http://www-01.sil.org/iso639-3/documentation.asp?id=bci) | Baoulé | 2M | 181 | | [`bum`](http://www-01.sil.org/iso639-3/documentation.asp?id=bum) | Bulu (Cameroon) | 2M | 182 | | [`epo`](http://www-01.sil.org/iso639-3/documentation.asp?id=epo) | Esperanto | 2M | 183 | | [`pam`](http://www-01.sil.org/iso639-3/documentation.asp?id=pam) | Pampanga | 2M | 184 | | [`tiv`](http://www-01.sil.org/iso639-3/documentation.asp?id=tiv) | Tiv | 2M | 185 | | [`tpi`](http://www-01.sil.org/iso639-3/documentation.asp?id=tpi) | Tok Pisin | 2M | 186 | | [`ven`](http://www-01.sil.org/iso639-3/documentation.asp?id=ven) | Venda | 2M | 187 | | [`ssw`](http://www-01.sil.org/iso639-3/documentation.asp?id=ssw) | Swati | 2M | 188 | | [`nyn`](http://www-01.sil.org/iso639-3/documentation.asp?id=nyn) | Nyankole | 2M | 189 | | [`kbd`](http://www-01.sil.org/iso639-3/documentation.asp?id=kbd) | Kabardian | 2M | 190 | | [`iii`](http://www-01.sil.org/iso639-3/documentation.asp?id=iii) | Sichuan Yi | 2M | 191 | | [`yao`](http://www-01.sil.org/iso639-3/documentation.asp?id=yao) | Yao | 2M | 192 | | [`lvs`](http://www-01.sil.org/iso639-3/documentation.asp?id=lvs) | Standard Latvian | 2M | 193 | | [`quz`](http://www-01.sil.org/iso639-3/documentation.asp?id=quz) | Cusco Quechua | 2M | 194 | | [`src`](http://www-01.sil.org/iso639-3/documentation.asp?id=src) | Logudorese Sardinian | 2M | 195 | | [`rup`](http://www-01.sil.org/iso639-3/documentation.asp?id=rup) | Macedo-Romanian | 2M | 196 | | [`sco`](http://www-01.sil.org/iso639-3/documentation.asp?id=sco) | Scots | 2M | 197 | | [`tso`](http://www-01.sil.org/iso639-3/documentation.asp?id=tso) | Tsonga | 2M | 198 | | [`men`](http://www-01.sil.org/iso639-3/documentation.asp?id=men) | Mende (Sierra Leone) | 1M | 199 | | [`fon`](http://www-01.sil.org/iso639-3/documentation.asp?id=fon) | Fon | 1M | 200 | | [`nhn`](http://www-01.sil.org/iso639-3/documentation.asp?id=nhn) | Central Nahuatl | 1M | 201 | | [`dip`](http://www-01.sil.org/iso639-3/documentation.asp?id=dip) | Northeastern Dinka | 1M | 202 | | [`kde`](http://www-01.sil.org/iso639-3/documentation.asp?id=kde) | Makonde | 1M | 203 | | [`kbp`](http://www-01.sil.org/iso639-3/documentation.asp?id=kbp) | Kabiyè | 1M | 204 | | [`tem`](http://www-01.sil.org/iso639-3/documentation.asp?id=tem) | Timne | 1M | 205 | | [`toi`](http://www-01.sil.org/iso639-3/documentation.asp?id=toi) | Tonga (Zambia) | 1M | 206 | | [`ekk`](http://www-01.sil.org/iso639-3/documentation.asp?id=ekk) | Standard Estonian | 1M | 207 | | [`snk`](http://www-01.sil.org/iso639-3/documentation.asp?id=snk) | Soninke | 1M | 208 | | [`cjk`](http://www-01.sil.org/iso639-3/documentation.asp?id=cjk) | Chokwe | 1M | 209 | | [`ada`](http://www-01.sil.org/iso639-3/documentation.asp?id=ada) | Adangme | 1M | 210 | | [`aii`](http://www-01.sil.org/iso639-3/documentation.asp?id=aii) | Assyrian Neo-Aramaic | 1M | 211 | | [`quy`](http://www-01.sil.org/iso639-3/documentation.asp?id=quy) | Ayacucho Quechua | 1M | 212 | | [`rmn`](http://www-01.sil.org/iso639-3/documentation.asp?id=rmn) | Balkan Romani | 1M | 213 | | [`bin`](http://www-01.sil.org/iso639-3/documentation.asp?id=bin) | Bini | 1M | 214 | | [`gaa`](http://www-01.sil.org/iso639-3/documentation.asp?id=gaa) | Ga | 1M | 215 | | [`ndo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ndo) | Ndonga | 1M | 216 | | [`nym`](http://www-01.sil.org/iso639-3/documentation.asp?id=nym) | Nyamwezi | 926K | 217 | | [`sus`](http://www-01.sil.org/iso639-3/documentation.asp?id=sus) | Susu | 923K | 218 | | [`tly`](http://www-01.sil.org/iso639-3/documentation.asp?id=tly) | Talysh | 915K | 219 | | [`srr`](http://www-01.sil.org/iso639-3/documentation.asp?id=srr) | Serer | 869K | 220 | | [`kha`](http://www-01.sil.org/iso639-3/documentation.asp?id=kha) | Khasi | 865K | 221 | | [`oci`](http://www-01.sil.org/iso639-3/documentation.asp?id=oci) | Occitan (post 1500) | 855K | 222 | | [`hea`](http://www-01.sil.org/iso639-3/documentation.asp?id=hea) | Northern Qiandong Miao | 820K | 223 | | [`gkp`](http://www-01.sil.org/iso639-3/documentation.asp?id=gkp) | Guinea Kpelle | 808K | 224 | | [`hni`](http://www-01.sil.org/iso639-3/documentation.asp?id=hni) | Hani | 747K | 225 | | [`fry`](http://www-01.sil.org/iso639-3/documentation.asp?id=fry) | Western Frisian | 730K | 226 | | [`yua`](http://www-01.sil.org/iso639-3/documentation.asp?id=yua) | Yucateco | 700K | 227 | | [`fij`](http://www-01.sil.org/iso639-3/documentation.asp?id=fij) | Fijian | 650K | 228 | | [`fur`](http://www-01.sil.org/iso639-3/documentation.asp?id=fur) | Friulian | 600K | 229 | | [`tet`](http://www-01.sil.org/iso639-3/documentation.asp?id=tet) | Tetum | 600K | 230 | | [`wln`](http://www-01.sil.org/iso639-3/documentation.asp?id=wln) | Walloon | 600K | 231 | | [`eus`](http://www-01.sil.org/iso639-3/documentation.asp?id=eus) | Basque | 588K | 232 | | [`oss`](http://www-01.sil.org/iso639-3/documentation.asp?id=oss) | Ossetian | 588K | 233 | | [`nbl`](http://www-01.sil.org/iso639-3/documentation.asp?id=nbl) | South Ndebele | 588K | 234 | | [`pov`](http://www-01.sil.org/iso639-3/documentation.asp?id=pov) | Upper Guinea Crioulo | 580K | 235 | | [`cym`](http://www-01.sil.org/iso639-3/documentation.asp?id=cym) | Welsh | 580K | 236 | | [`lus`](http://www-01.sil.org/iso639-3/documentation.asp?id=lus) | Lushai | 542K | 237 | | [`dag`](http://www-01.sil.org/iso639-3/documentation.asp?id=dag) | Dagbani | 540K | 238 | | [`dga`](http://www-01.sil.org/iso639-3/documentation.asp?id=dga) | Southern Dagaare | 501K | 239 | | [`bre`](http://www-01.sil.org/iso639-3/documentation.asp?id=bre) | Breton | 500K | 240 | | [`kek`](http://www-01.sil.org/iso639-3/documentation.asp?id=kek) | Kekchí | 500K | 241 | | [`lij`](http://www-01.sil.org/iso639-3/documentation.asp?id=lij) | Ligurian | 500K | 242 | | [`pcd`](http://www-01.sil.org/iso639-3/documentation.asp?id=pcd) | Picard | 500K | 243 | | [`roh`](http://www-01.sil.org/iso639-3/documentation.asp?id=roh) | Romansh | 500K | 244 | | [`bfa`](http://www-01.sil.org/iso639-3/documentation.asp?id=bfa) | Bari | 480K | 245 | | [`kri`](http://www-01.sil.org/iso639-3/documentation.asp?id=kri) | Krio | 480K | 246 | | [`cnh`](http://www-01.sil.org/iso639-3/documentation.asp?id=cnh) | Hakha Chin | 446K | 247 | | [`lob`](http://www-01.sil.org/iso639-3/documentation.asp?id=lob) | Lobi | 442K | 248 | | [`arn`](http://www-01.sil.org/iso639-3/documentation.asp?id=arn) | Mapudungun | 440K | 249 | | [`bba`](http://www-01.sil.org/iso639-3/documentation.asp?id=bba) | Baatonum | 400K | 250 | | [`dzo`](http://www-01.sil.org/iso639-3/documentation.asp?id=dzo) | Dzongkha | 400K | 251 | | [`kea`](http://www-01.sil.org/iso639-3/documentation.asp?id=kea) | Kabuverdianu | 394K | 252 | | [`sah`](http://www-01.sil.org/iso639-3/documentation.asp?id=sah) | Yakut | 363K | 253 | | [`smo`](http://www-01.sil.org/iso639-3/documentation.asp?id=smo) | Samoan | 362K | 254 | | [`koo`](http://www-01.sil.org/iso639-3/documentation.asp?id=koo) | Konzo | 362K | 255 | | [`nzi`](http://www-01.sil.org/iso639-3/documentation.asp?id=nzi) | Nzima | 353K | 256 | | [`maz`](http://www-01.sil.org/iso639-3/documentation.asp?id=maz) | Central Mazahua | 350K | 257 | | [`pis`](http://www-01.sil.org/iso639-3/documentation.asp?id=pis) | Pijin | 350K | 258 | | [`ctd`](http://www-01.sil.org/iso639-3/documentation.asp?id=ctd) | Tedim Chin | 344K | 259 | | [`cos`](http://www-01.sil.org/iso639-3/documentation.asp?id=cos) | Corsican | 341K | 260 | | [`ltz`](http://www-01.sil.org/iso639-3/documentation.asp?id=ltz) | Luxembourgish | 336K | 261 | | [`lia`](http://www-01.sil.org/iso639-3/documentation.asp?id=lia) | West-Central Limba | 335K | 262 | | [`mlt`](http://www-01.sil.org/iso639-3/documentation.asp?id=mlt) | Maltese | 330K | 263 | | [`zdj`](http://www-01.sil.org/iso639-3/documentation.asp?id=zdj) | Ngazidja Comorian | 312K | 264 | | [`guc`](http://www-01.sil.org/iso639-3/documentation.asp?id=guc) | Wayuu | 305K | 265 | | [`qwh`](http://www-01.sil.org/iso639-3/documentation.asp?id=qwh) | Huaylas Ancash Quechua | 300K | 266 | | [`quc`](http://www-01.sil.org/iso639-3/documentation.asp?id=quc) | K'iche' | 300K | 267 | | [`div`](http://www-01.sil.org/iso639-3/documentation.asp?id=div) | Dhivehi | 287K | 268 | | [`isl`](http://www-01.sil.org/iso639-3/documentation.asp?id=isl) | Icelandic | 283K | 269 | | [`kqn`](http://www-01.sil.org/iso639-3/documentation.asp?id=kqn) | Kaonde | 276K | 270 | | [`pap`](http://www-01.sil.org/iso639-3/documentation.asp?id=pap) | Papiamento | 263K | 271 | | [`gle`](http://www-01.sil.org/iso639-3/documentation.asp?id=gle) | Irish | 260K | 272 | | [`dyo`](http://www-01.sil.org/iso639-3/documentation.asp?id=dyo) | Jola-Fonyi | 260K | 273 | | [`hns`](http://www-01.sil.org/iso639-3/documentation.asp?id=hns) | Caribbean Hindustani | 250K | 274 | | [`gjn`](http://www-01.sil.org/iso639-3/documentation.asp?id=gjn) | Gonja | 250K | 275 | | [`njo`](http://www-01.sil.org/iso639-3/documentation.asp?id=njo) | Ao Naga | 232K | 276 | | [`hus`](http://www-01.sil.org/iso639-3/documentation.asp?id=hus) | Huastec | 220K | 277 | | [`xsm`](http://www-01.sil.org/iso639-3/documentation.asp?id=xsm) | Kasem | 200K | 278 | | [`ote`](http://www-01.sil.org/iso639-3/documentation.asp?id=ote) | Mezquital Otomi | 200K | 279 | | [`qxn`](http://www-01.sil.org/iso639-3/documentation.asp?id=qxn) | Northern Conchucos Ancash Quechua | 200K | 280 | | [`tyv`](http://www-01.sil.org/iso639-3/documentation.asp?id=tyv) | Tuvinian | 200K | 281 | | [`gag`](http://www-01.sil.org/iso639-3/documentation.asp?id=gag) | Gagauz | 198K | 282 | | [`san`](http://www-01.sil.org/iso639-3/documentation.asp?id=san) | Sanskrit | 194K | 283 | | [`shk`](http://www-01.sil.org/iso639-3/documentation.asp?id=shk) | Shilluk | 175K | 284 | | [`nba`](http://www-01.sil.org/iso639-3/documentation.asp?id=nba) | Nyemba | 172K | 285 | | [`miq`](http://www-01.sil.org/iso639-3/documentation.asp?id=miq) | Mískito | 160K | 286 | | [`mam`](http://www-01.sil.org/iso639-3/documentation.asp?id=mam) | Mam | 157K | 287 | | [`tah`](http://www-01.sil.org/iso639-3/documentation.asp?id=tah) | Tahitian | 150K | 288 | | [`nav`](http://www-01.sil.org/iso639-3/documentation.asp?id=nav) | Navajo | 149K | 289 | | [`ami`](http://www-01.sil.org/iso639-3/documentation.asp?id=ami) | Amis | 138K | 290 | | [`lot`](http://www-01.sil.org/iso639-3/documentation.asp?id=lot) | Otuho | 135K | 291 | | [`cak`](http://www-01.sil.org/iso639-3/documentation.asp?id=cak) | Kaqchikel | 132K | 292 | | [`tzh`](http://www-01.sil.org/iso639-3/documentation.asp?id=tzh) | Tzeltal | 130K | 293 | | [`tzo`](http://www-01.sil.org/iso639-3/documentation.asp?id=tzo) | Tzotzil | 130K | 294 | | [`lns`](http://www-01.sil.org/iso639-3/documentation.asp?id=lns) | Lamnso' | 125K | 295 | | [`ton`](http://www-01.sil.org/iso639-3/documentation.asp?id=ton) | Tonga (Tonga Islands) | 123K | 296 | | [`tbz`](http://www-01.sil.org/iso639-3/documentation.asp?id=tbz) | Ditammari | 120K | 297 | | [`lad`](http://www-01.sil.org/iso639-3/documentation.asp?id=lad) | Ladino | 120K | 298 | | [`vai`](http://www-01.sil.org/iso639-3/documentation.asp?id=vai) | Vai | 120K | 299 | | [`mto`](http://www-01.sil.org/iso639-3/documentation.asp?id=mto) | Totontepec Mixe | 119K | 300 | | [`ady`](http://www-01.sil.org/iso639-3/documentation.asp?id=ady) | Adyghe | 118K | 301 | | [`abk`](http://www-01.sil.org/iso639-3/documentation.asp?id=abk) | Abkhazian | 105K | 302 | | [`ast`](http://www-01.sil.org/iso639-3/documentation.asp?id=ast) | Asturian | 100K | 303 | | [`tsz`](http://www-01.sil.org/iso639-3/documentation.asp?id=tsz) | Purepecha | 100K | 304 | | [`swb`](http://www-01.sil.org/iso639-3/documentation.asp?id=swb) | Maore Comorian | 97K | 305 | | [`cab`](http://www-01.sil.org/iso639-3/documentation.asp?id=cab) | Garifuna | 95K | 306 | | [`krl`](http://www-01.sil.org/iso639-3/documentation.asp?id=krl) | Karelian | 80K | 307 | | [`zam`](http://www-01.sil.org/iso639-3/documentation.asp?id=zam) | Miahuatlán Zapotec | 80K | 308 | | [`top`](http://www-01.sil.org/iso639-3/documentation.asp?id=top) | Papantla Totonac | 80K | 309 | | [`cha`](http://www-01.sil.org/iso639-3/documentation.asp?id=cha) | Chamorro | 78K | 310 | | [`crs`](http://www-01.sil.org/iso639-3/documentation.asp?id=crs) | Seselwa Creole French | 73K | 311 | | [`ddn`](http://www-01.sil.org/iso639-3/documentation.asp?id=ddn) | Dendi (Benin) | 72K | 312 | | [`loz`](http://www-01.sil.org/iso639-3/documentation.asp?id=loz) | Lozi | 72K | 313 | | [`mri`](http://www-01.sil.org/iso639-3/documentation.asp?id=mri) | Maori | 70K | 314 | | [`slr`](http://www-01.sil.org/iso639-3/documentation.asp?id=slr) | Salar | 70K | 315 | | [`hsb`](http://www-01.sil.org/iso639-3/documentation.asp?id=hsb) | Upper Sorbian | 70K | 316 | | [`cri`](http://www-01.sil.org/iso639-3/documentation.asp?id=cri) | Sãotomense | 70K | 317 | | [`pbb`](http://www-01.sil.org/iso639-3/documentation.asp?id=pbb) | Páez | 68K | 318 | | [`alt`](http://www-01.sil.org/iso639-3/documentation.asp?id=alt) | Southern Altai | 68K | 319 | | [`qva`](http://www-01.sil.org/iso639-3/documentation.asp?id=qva) | Ambo-Pasco Quechua | 65K | 320 | | [`mxv`](http://www-01.sil.org/iso639-3/documentation.asp?id=mxv) | Metlatónoc Mixtec | 65K | 321 | | [`gla`](http://www-01.sil.org/iso639-3/documentation.asp?id=gla) | Scottish Gaelic | 64K | 322 | | [`kjh`](http://www-01.sil.org/iso639-3/documentation.asp?id=kjh) | Khakas | 60K | 323 | | [`csw`](http://www-01.sil.org/iso639-3/documentation.asp?id=csw) | Swampy Cree | 60K | 324 | | [`qvm`](http://www-01.sil.org/iso639-3/documentation.asp?id=qvm) | Margos-Yarowilca-Lauricocha Quechua | 55K | 325 | | [`fao`](http://www-01.sil.org/iso639-3/documentation.asp?id=fao) | Faroese | 47K | 326 | | [`kal`](http://www-01.sil.org/iso639-3/documentation.asp?id=kal) | Kalaallisut | 47K | 327 | | [`cni`](http://www-01.sil.org/iso639-3/documentation.asp?id=cni) | Asháninka | 45K | 328 | | [`chk`](http://www-01.sil.org/iso639-3/documentation.asp?id=chk) | Chuukese | 45K | 329 | | [`mah`](http://www-01.sil.org/iso639-3/documentation.asp?id=mah) | Marshallese | 44K | 330 | | [`rar`](http://www-01.sil.org/iso639-3/documentation.asp?id=rar) | Rarotongan | 43K | 331 | | [`evn`](http://www-01.sil.org/iso639-3/documentation.asp?id=evn) | Evenki | 40K | 332 | | [`qvn`](http://www-01.sil.org/iso639-3/documentation.asp?id=qvn) | North Junín Quechua | 40K | 333 | | [`wwa`](http://www-01.sil.org/iso639-3/documentation.asp?id=wwa) | Waama | 40K | 334 | | [`buc`](http://www-01.sil.org/iso639-3/documentation.asp?id=buc) | Bushi | 39K | 335 | | [`qvh`](http://www-01.sil.org/iso639-3/documentation.asp?id=qvh) | Huamalíes-Dos de Mayo Huánuco Quechua | 38K | 336 | | [`toj`](http://www-01.sil.org/iso639-3/documentation.asp?id=toj) | Tojolabal | 36K | 337 | | [`lue`](http://www-01.sil.org/iso639-3/documentation.asp?id=lue) | Luvale | 36K | 338 | | [`qvc`](http://www-01.sil.org/iso639-3/documentation.asp?id=qvc) | Cajamarca Quechua | 35K | 339 | | [`ojb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ojb) | Northwestern Ojibwa | 35K | 340 | | [`jiv`](http://www-01.sil.org/iso639-3/documentation.asp?id=jiv) | Shuar | 35K | 341 | | [`lld`](http://www-01.sil.org/iso639-3/documentation.asp?id=lld) | Ladin | 30K | 342 | | [`hlt`](http://www-01.sil.org/iso639-3/documentation.asp?id=hlt) | Matu Chin | 30K | 343 | | [`que`](http://www-01.sil.org/iso639-3/documentation.asp?id=que) | Quechua | 30K | 344 | | [`pon`](http://www-01.sil.org/iso639-3/documentation.asp?id=pon) | Pohnpeian | 28K | 345 | | [`agr`](http://www-01.sil.org/iso639-3/documentation.asp?id=agr) | Aguaruna | 28K | 346 | | [`yrk`](http://www-01.sil.org/iso639-3/documentation.asp?id=yrk) | Nenets | 27K | 347 | | [`quh`](http://www-01.sil.org/iso639-3/documentation.asp?id=quh) | South Bolivian Quechua | 25K | 348 | | [`tca`](http://www-01.sil.org/iso639-3/documentation.asp?id=tca) | Ticuna | 25K | 349 | | [`chj`](http://www-01.sil.org/iso639-3/documentation.asp?id=chj) | Ojitlán Chinantec | 22K | 350 | | [`ike`](http://www-01.sil.org/iso639-3/documentation.asp?id=ike) | Eastern Canadian Inuktitut | 22K | 351 | | [`kwi`](http://www-01.sil.org/iso639-3/documentation.asp?id=kwi) | Awa-Cuaiquer | 21K | 352 | | [`rgn`](http://www-01.sil.org/iso639-3/documentation.asp?id=rgn) | Romagnol | 20K | 353 | | [`oki`](http://www-01.sil.org/iso639-3/documentation.asp?id=oki) | Okiek | 20K | 354 | | [`tob`](http://www-01.sil.org/iso639-3/documentation.asp?id=tob) | Toba | 20K | 355 | | [`guu`](http://www-01.sil.org/iso639-3/documentation.asp?id=guu) | Yanomamö | 18K | 356 | | [`qxu`](http://www-01.sil.org/iso639-3/documentation.asp?id=qxu) | Arequipa-La Unión Quechua | 16K | 357 | | [`pau`](http://www-01.sil.org/iso639-3/documentation.asp?id=pau) | Palauan | 15K | 358 | | [`shp`](http://www-01.sil.org/iso639-3/documentation.asp?id=shp) | Shipibo-Conibo | 15K | 359 | | [`gld`](http://www-01.sil.org/iso639-3/documentation.asp?id=gld) | Nanai | 12K | 360 | | [`gug`](http://www-01.sil.org/iso639-3/documentation.asp?id=gug) | Paraguayan Guaraní | 12K | 361 | | [`mzi`](http://www-01.sil.org/iso639-3/documentation.asp?id=mzi) | Ixcatlán Mazatec | 11K | 362 | | [`cjs`](http://www-01.sil.org/iso639-3/documentation.asp?id=cjs) | Shor | 10K | 363 | | [`mic`](http://www-01.sil.org/iso639-3/documentation.asp?id=mic) | Mi'kmaq | 8K | 364 | | [`haw`](http://www-01.sil.org/iso639-3/documentation.asp?id=haw) | Hawaiian | 8K | 365 | | [`eve`](http://www-01.sil.org/iso639-3/documentation.asp?id=eve) | Even | 7K | 366 | | [`yap`](http://www-01.sil.org/iso639-3/documentation.asp?id=yap) | Yapese | 7K | 367 | | [`cbt`](http://www-01.sil.org/iso639-3/documentation.asp?id=cbt) | Chayahuita | 6K | 368 | | [`ame`](http://www-01.sil.org/iso639-3/documentation.asp?id=ame) | Yanesha' | 6K | 369 | | [`gyr`](http://www-01.sil.org/iso639-3/documentation.asp?id=gyr) | Guarayu | 6K | 370 | | [`vep`](http://www-01.sil.org/iso639-3/documentation.asp?id=vep) | Veps | 6K | 371 | | [`cpu`](http://www-01.sil.org/iso639-3/documentation.asp?id=cpu) | Pichis Ashéninka | 5K | 372 | | [`acu`](http://www-01.sil.org/iso639-3/documentation.asp?id=acu) | Achuar-Shiwiar | 5K | 373 | | [`not`](http://www-01.sil.org/iso639-3/documentation.asp?id=not) | Nomatsiguenga | 4K | 374 | | [`sme`](http://www-01.sil.org/iso639-3/documentation.asp?id=sme) | Northern Sami | 4K | 375 | | [`yad`](http://www-01.sil.org/iso639-3/documentation.asp?id=yad) | Yagua | 4K | 376 | | [`ura`](http://www-01.sil.org/iso639-3/documentation.asp?id=ura) | Urarina | 4K | 377 | | [`cbu`](http://www-01.sil.org/iso639-3/documentation.asp?id=cbu) | Candoshi-Shapra | 3K | 378 | | [`huu`](http://www-01.sil.org/iso639-3/documentation.asp?id=huu) | Murui Huitoto | 3K | 379 | | [`cof`](http://www-01.sil.org/iso639-3/documentation.asp?id=cof) | Colorado | 2K | 380 | | [`boa`](http://www-01.sil.org/iso639-3/documentation.asp?id=boa) | Bora | 2K | 381 | | [`cbs`](http://www-01.sil.org/iso639-3/documentation.asp?id=cbs) | Cashinahua | 2K | 382 | | [`ztu`](http://www-01.sil.org/iso639-3/documentation.asp?id=ztu) | Güilá Zapotec | 2K | 383 | | [`piu`](http://www-01.sil.org/iso639-3/documentation.asp?id=piu) | Pintupi-Luritja | 2K | 384 | | [`cbr`](http://www-01.sil.org/iso639-3/documentation.asp?id=cbr) | Cashibo-Cacataibo | 2K | 385 | | [`mcf`](http://www-01.sil.org/iso639-3/documentation.asp?id=mcf) | Matsés | 1K | 386 | | [`bis`](http://www-01.sil.org/iso639-3/documentation.asp?id=bis) | Bislama | 1K | 387 | | [`orh`](http://www-01.sil.org/iso639-3/documentation.asp?id=orh) | Oroqen | 1K | 388 | | [`ykg`](http://www-01.sil.org/iso639-3/documentation.asp?id=ykg) | Northern Yukaghir | 1K | 389 | | [`ese`](http://www-01.sil.org/iso639-3/documentation.asp?id=ese) | Ese Ejja | 1K | 390 | | [`nio`](http://www-01.sil.org/iso639-3/documentation.asp?id=nio) | Nganasan | 1K | 391 | | [`cic`](http://www-01.sil.org/iso639-3/documentation.asp?id=cic) | Chickasaw | 1K | 392 | | [`csa`](http://www-01.sil.org/iso639-3/documentation.asp?id=csa) | Chiltepec Chinantec | 1K | 393 | | [`niv`](http://www-01.sil.org/iso639-3/documentation.asp?id=niv) | Gilyak | 1K | 394 | | [`mcd`](http://www-01.sil.org/iso639-3/documentation.asp?id=mcd) | Sharanahua | 950 | 395 | | [`amc`](http://www-01.sil.org/iso639-3/documentation.asp?id=amc) | Amahuaca | 720 | 396 | | [`amr`](http://www-01.sil.org/iso639-3/documentation.asp?id=amr) | Amarakaeri | 500 | 397 | | [`snn`](http://www-01.sil.org/iso639-3/documentation.asp?id=snn) | Siona | 500 | 398 | | [`cot`](http://www-01.sil.org/iso639-3/documentation.asp?id=cot) | Caquinte | 300 | 399 | | [`oaa`](http://www-01.sil.org/iso639-3/documentation.asp?id=oaa) | Orok | 295 | 400 | | [`ajg`](http://www-01.sil.org/iso639-3/documentation.asp?id=ajg) | Aja (Benin) | 200 | 401 | | [`arl`](http://www-01.sil.org/iso639-3/documentation.asp?id=arl) | Arabela | 150 | 402 | | [`ppl`](http://www-01.sil.org/iso639-3/documentation.asp?id=ppl) | Pipil | 20 | 403 | | [`aar`](http://www-01.sil.org/iso639-3/documentation.asp?id=aar) | Afar | unknown | 404 | | [`bax`](http://www-01.sil.org/iso639-3/documentation.asp?id=bax) | Bamun | unknown | 405 | | [`nku`](http://www-01.sil.org/iso639-3/documentation.asp?id=nku) | Bouna Kulango | unknown | 406 | | [`cbi`](http://www-01.sil.org/iso639-3/documentation.asp?id=cbi) | Chachi | unknown | 407 | | [`chr`](http://www-01.sil.org/iso639-3/documentation.asp?id=chr) | Cherokee | unknown | 408 | | [`chv`](http://www-01.sil.org/iso639-3/documentation.asp?id=chv) | Chuvash | unknown | 409 | | [`crh`](http://www-01.sil.org/iso639-3/documentation.asp?id=crh) | Crimean Tatar | unknown | 410 | | [`duu`](http://www-01.sil.org/iso639-3/documentation.asp?id=duu) | Drung | unknown | 411 | | [`cfm`](http://www-01.sil.org/iso639-3/documentation.asp?id=cfm) | Falam Chin | unknown | 412 | | [`fat`](http://www-01.sil.org/iso639-3/documentation.asp?id=fat) | Fanti | unknown | 413 | | [`fvr`](http://www-01.sil.org/iso639-3/documentation.asp?id=fvr) | Fur | unknown | 414 | | [`gej`](http://www-01.sil.org/iso639-3/documentation.asp?id=gej) | Gen | unknown | 415 | | [`guk`](http://www-01.sil.org/iso639-3/documentation.asp?id=guk) | Gumuz | unknown | 416 | | [`ido`](http://www-01.sil.org/iso639-3/documentation.asp?id=ido) | Ido | unknown | 417 | | [`idu`](http://www-01.sil.org/iso639-3/documentation.asp?id=idu) | Idoma | unknown | 418 | | [`ina`](http://www-01.sil.org/iso639-3/documentation.asp?id=ina) | Interlingua (International Auxiliary Language Association) | unknown | 419 | | [`kbr`](http://www-01.sil.org/iso639-3/documentation.asp?id=kbr) | Kafa | unknown | 420 | | [`kaa`](http://www-01.sil.org/iso639-3/documentation.asp?id=kaa) | Kara-Kalpak | unknown | 421 | | [`kkh`](http://www-01.sil.org/iso639-3/documentation.asp?id=kkh) | Khün | unknown | 422 | | [`ktu`](http://www-01.sil.org/iso639-3/documentation.asp?id=ktu) | Kituba (Democratic Republic of Congo) | unknown | 423 | | [`fkv`](http://www-01.sil.org/iso639-3/documentation.asp?id=fkv) | Kven Finnish | unknown | 424 | | [`lat`](http://www-01.sil.org/iso639-3/documentation.asp?id=lat) | Latin | unknown | 425 | | [`glv`](http://www-01.sil.org/iso639-3/documentation.asp?id=glv) | Manx | unknown | 426 | | [`mfq`](http://www-01.sil.org/iso639-3/documentation.asp?id=mfq) | Moba | unknown | 427 | | [`mnw`](http://www-01.sil.org/iso639-3/documentation.asp?id=mnw) | Mon | unknown | 428 | | [`cnr`](http://www-01.sil.org/iso639-3/documentation.asp?id=cnr) | Montenegrin | unknown | 429 | | [`mor`](http://www-01.sil.org/iso639-3/documentation.asp?id=mor) | Moro | unknown | 430 | | [`mxi`](http://www-01.sil.org/iso639-3/documentation.asp?id=mxi) | Mozarabic | 0 | 431 | | [`pcm`](http://www-01.sil.org/iso639-3/documentation.asp?id=pcm) | Nigerian Pidgin | unknown | 432 | | [`niu`](http://www-01.sil.org/iso639-3/documentation.asp?id=niu) | Niuean | unknown | 433 | | [`kqs`](http://www-01.sil.org/iso639-3/documentation.asp?id=kqs) | Northern Kissi | unknown | 434 | | [`sey`](http://www-01.sil.org/iso639-3/documentation.asp?id=sey) | Secoya | unknown | 435 | | [`sid`](http://www-01.sil.org/iso639-3/documentation.asp?id=sid) | Sidamo | unknown | 436 | | [`azb`](http://www-01.sil.org/iso639-3/documentation.asp?id=azb) | South Azerbaijani | unknown | 437 | | [`ijs`](http://www-01.sil.org/iso639-3/documentation.asp?id=ijs) | Southeast Ijo | unknown | 438 | | [`gsw`](http://www-01.sil.org/iso639-3/documentation.asp?id=gsw) | Swiss German | unknown | 439 | | [`blt`](http://www-01.sil.org/iso639-3/documentation.asp?id=blt) | Tai Dam | unknown | 440 | | [`kdh`](http://www-01.sil.org/iso639-3/documentation.asp?id=kdh) | Tem | unknown | 441 | | [`tdt`](http://www-01.sil.org/iso639-3/documentation.asp?id=tdt) | Tetun Dili | unknown | 442 | | [`twi`](http://www-01.sil.org/iso639-3/documentation.asp?id=twi) | Twi | unknown | 443 | | [`udu`](http://www-01.sil.org/iso639-3/documentation.asp?id=udu) | Uduk | unknown | 444 | | [`auc`](http://www-01.sil.org/iso639-3/documentation.asp?id=auc) | Waorani | unknown | 445 | | [`gaz`](http://www-01.sil.org/iso639-3/documentation.asp?id=gaz) | West Central Oromo | unknown | 446 | | [`pnb`](http://www-01.sil.org/iso639-3/documentation.asp?id=pnb) | Western Panjabi | unknown | 447 | | [`dje`](http://www-01.sil.org/iso639-3/documentation.asp?id=dje) | Zarma | unknown | 448 | | [`zro`](http://www-01.sil.org/iso639-3/documentation.asp?id=zro) | Záparo | unknown | 449 | 450 | ## License 451 | 452 | [MIT](https://github.com/wooorm/franc/blob/main/license) © [Titus Wormer](http://wooorm.com) 453 | --------------------------------------------------------------------------------