├── .gitignore
├── LICENSE
├── README.md
├── bin
└── cli.js
├── package-lock.json
├── package.json
├── src
├── components
│ ├── checkByteOrderMark.js
│ ├── checkUTF.js
│ ├── processContent.js
│ └── processing-content
│ │ ├── calculateConfidenceScore.js
│ │ └── countAllMatches.js
├── config
│ ├── byteOrderMarkObject.js
│ └── languageObject.js
├── index-browser.js
├── index-node.js
└── index.d.ts
└── tests
├── browser
├── browser-test
│ ├── README.md
│ ├── package-lock.json
│ ├── package.json
│ ├── public
│ │ ├── index.html
│ │ ├── manifest.json
│ │ └── robots.txt
│ ├── src
│ │ ├── App.tsx
│ │ ├── index.tsx
│ │ └── react-app-env.d.ts
│ └── tsconfig.json
├── html-test
│ ├── app.js
│ └── index.html
└── live-demo
│ ├── LICENSE
│ ├── README.md
│ ├── package-lock.json
│ ├── package.json
│ ├── public
│ ├── index.html
│ ├── manifest.json
│ └── robots.txt
│ └── src
│ ├── App.js
│ ├── defaultFileInfo.js
│ ├── index.css
│ └── index.js
└── node
├── node-ts-test
├── index.ts
├── package-lock.json
├── package.json
└── tsconfig.json
└── node.test.js
/.gitignore:
--------------------------------------------------------------------------------
1 | umd/
2 | node_modules
3 | build
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 gignu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Detect-File-Encoding-And-Language
2 |
3 | 
4 | 
5 | 
6 |
7 | [](https://www.npmjs.org/package/detect-file-encoding-and-language)
8 |
9 | ## Functionality
10 |
11 | Determine the encoding and language of text files!
12 |
13 | - Detects 40 languages as well as the appropriate encoding
14 | - Available as CLI, in Node.js and in the browser
15 | - Supports .txt, .srt, .sub, .html, .csv, .tsv
16 | - Works best with large inputs
17 | - Completely free, no API key required
18 |
19 | For reliable encoding and language detection, use files containing at least 500 words of coherent text. Smaller inputs can work as well but the results might be less accurate and in some cases incorrect.
20 |
21 | ## Live Demo
22 |
23 | Feel free to test the functionality of this NPM package [here](https://detect-file-encoding-and-language-live-demo.netlify.app/). Upload your own files and see if the encoding and language are detected correctly!
24 |
25 | ## Installation
26 |
27 | ```
28 | npm install detect-file-encoding-and-language
29 | ```
30 |
31 | ## Usage
32 |
33 | ### Via CDN
34 |
35 | ```js
36 | // index.html
37 |
78 |
79 | Select a folder that contains subtitle files or subdirectories with subtitle files.
80 | Then open the browser console to see whether tests are passing or failing.
81 | Make sure you're running the latest version of detect-file-encoding-and-language
82 | by taking a closer look at the package in the node modules folder or by downlaoding
83 | a fresh clone of this repo!
84 |
85 |
Determine the encoding and language of text files!
70 |
71 |
72 | Detects 40 languages as well as the appropriate encoding
73 |
74 |
Available as CLI, in Node.js and in the browser
75 |
Supports .txt, .srt, and .sub
76 |
Works best with large inputs
77 |
Completely free, no API key required
78 |
79 |
80 | For reliable encoding and language detection, use files
81 | containing 500 words or more. Smaller inputs can work as well
82 | but the results might be less accurate and in some cases
83 | incorrect.
84 |
85 |
86 | Feel free to upload your own files and see if the encoding and
87 | language are detected correctly!
88 |
89 |
90 | )}
91 |
92 |
93 |
94 |
95 | );
96 | }
97 |
98 | export default App;
99 |
--------------------------------------------------------------------------------
/tests/browser/live-demo/src/defaultFileInfo.js:
--------------------------------------------------------------------------------
1 | export default {
2 | language: "",
3 | encoding: "",
4 | confidence: ""
5 | };
6 |
--------------------------------------------------------------------------------
/tests/browser/live-demo/src/index.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: lightgray;
3 | }
4 |
5 | h4 {
6 | margin-bottom: 60px;
7 | text-align: center;
8 | }
9 |
10 | .card-panel {
11 | overflow: hidden;
12 | min-height: 95vh;
13 | }
--------------------------------------------------------------------------------
/tests/browser/live-demo/src/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import './index.css';
4 | import App from './App';
5 |
6 | ReactDOM.render(, document.getElementById('root'));
7 |
--------------------------------------------------------------------------------
/tests/node/node-ts-test/index.ts:
--------------------------------------------------------------------------------
1 | // Looks at the "scripts" section in package.json to run this code!
2 | import languageEncoding from "detect-file-encoding-and-language";
3 | const pathToFile = "/home/gignu/Documents/Subtitle Database/Samples for each Format/polish-cp-1250-sample-subtitles.srt";
4 | languageEncoding(pathToFile).then((fileInfo) => console.log(fileInfo));
5 |
--------------------------------------------------------------------------------
/tests/node/node-ts-test/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "test",
3 | "version": "1.0.0",
4 | "lockfileVersion": 2,
5 | "requires": true,
6 | "packages": {
7 | "": {
8 | "name": "test",
9 | "version": "1.0.0",
10 | "license": "ISC",
11 | "dependencies": {
12 | "detect-file-encoding-and-language": "git+https://github.com/gignupg/Detect-File-Encoding-and-Language.git"
13 | },
14 | "devDependencies": {
15 | "ts-node": "^10.9.1",
16 | "typescript": "^4.8.2"
17 | }
18 | },
19 | "node_modules/@cspotcode/source-map-support": {
20 | "version": "0.8.1",
21 | "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
22 | "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
23 | "dev": true,
24 | "dependencies": {
25 | "@jridgewell/trace-mapping": "0.3.9"
26 | },
27 | "engines": {
28 | "node": ">=12"
29 | }
30 | },
31 | "node_modules/@jridgewell/resolve-uri": {
32 | "version": "3.1.0",
33 | "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz",
34 | "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==",
35 | "dev": true,
36 | "engines": {
37 | "node": ">=6.0.0"
38 | }
39 | },
40 | "node_modules/@jridgewell/sourcemap-codec": {
41 | "version": "1.4.14",
42 | "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
43 | "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
44 | "dev": true
45 | },
46 | "node_modules/@jridgewell/trace-mapping": {
47 | "version": "0.3.9",
48 | "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
49 | "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
50 | "dev": true,
51 | "dependencies": {
52 | "@jridgewell/resolve-uri": "^3.0.3",
53 | "@jridgewell/sourcemap-codec": "^1.4.10"
54 | }
55 | },
56 | "node_modules/@tsconfig/node10": {
57 | "version": "1.0.9",
58 | "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz",
59 | "integrity": "sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==",
60 | "dev": true
61 | },
62 | "node_modules/@tsconfig/node12": {
63 | "version": "1.0.11",
64 | "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
65 | "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==",
66 | "dev": true
67 | },
68 | "node_modules/@tsconfig/node14": {
69 | "version": "1.0.3",
70 | "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
71 | "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==",
72 | "dev": true
73 | },
74 | "node_modules/@tsconfig/node16": {
75 | "version": "1.0.3",
76 | "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.3.tgz",
77 | "integrity": "sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ==",
78 | "dev": true
79 | },
80 | "node_modules/@types/node": {
81 | "version": "18.7.14",
82 | "resolved": "https://registry.npmjs.org/@types/node/-/node-18.7.14.tgz",
83 | "integrity": "sha512-6bbDaETVi8oyIARulOE9qF1/Qdi/23z6emrUh0fNJRUmjznqrixD4MpGDdgOFk5Xb0m2H6Xu42JGdvAxaJR/wA==",
84 | "dev": true,
85 | "peer": true
86 | },
87 | "node_modules/acorn": {
88 | "version": "8.8.0",
89 | "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
90 | "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w==",
91 | "dev": true,
92 | "bin": {
93 | "acorn": "bin/acorn"
94 | },
95 | "engines": {
96 | "node": ">=0.4.0"
97 | }
98 | },
99 | "node_modules/acorn-walk": {
100 | "version": "8.2.0",
101 | "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz",
102 | "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==",
103 | "dev": true,
104 | "engines": {
105 | "node": ">=0.4.0"
106 | }
107 | },
108 | "node_modules/arg": {
109 | "version": "4.1.3",
110 | "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
111 | "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
112 | "dev": true
113 | },
114 | "node_modules/create-require": {
115 | "version": "1.1.1",
116 | "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
117 | "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
118 | "dev": true
119 | },
120 | "node_modules/detect-file-encoding-and-language": {
121 | "version": "2.3.0",
122 | "resolved": "git+ssh://git@github.com/gignupg/Detect-File-Encoding-and-Language.git#f94553de99d25bfa7bcc2b9d6ebec3bfaea774ee",
123 | "license": "MIT",
124 | "bin": {
125 | "dfeal": "bin/cli.js"
126 | }
127 | },
128 | "node_modules/diff": {
129 | "version": "4.0.2",
130 | "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
131 | "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
132 | "dev": true,
133 | "engines": {
134 | "node": ">=0.3.1"
135 | }
136 | },
137 | "node_modules/make-error": {
138 | "version": "1.3.6",
139 | "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
140 | "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
141 | "dev": true
142 | },
143 | "node_modules/ts-node": {
144 | "version": "10.9.1",
145 | "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz",
146 | "integrity": "sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==",
147 | "dev": true,
148 | "dependencies": {
149 | "@cspotcode/source-map-support": "^0.8.0",
150 | "@tsconfig/node10": "^1.0.7",
151 | "@tsconfig/node12": "^1.0.7",
152 | "@tsconfig/node14": "^1.0.0",
153 | "@tsconfig/node16": "^1.0.2",
154 | "acorn": "^8.4.1",
155 | "acorn-walk": "^8.1.1",
156 | "arg": "^4.1.0",
157 | "create-require": "^1.1.0",
158 | "diff": "^4.0.1",
159 | "make-error": "^1.1.1",
160 | "v8-compile-cache-lib": "^3.0.1",
161 | "yn": "3.1.1"
162 | },
163 | "bin": {
164 | "ts-node": "dist/bin.js",
165 | "ts-node-cwd": "dist/bin-cwd.js",
166 | "ts-node-esm": "dist/bin-esm.js",
167 | "ts-node-script": "dist/bin-script.js",
168 | "ts-node-transpile-only": "dist/bin-transpile.js",
169 | "ts-script": "dist/bin-script-deprecated.js"
170 | },
171 | "peerDependencies": {
172 | "@swc/core": ">=1.2.50",
173 | "@swc/wasm": ">=1.2.50",
174 | "@types/node": "*",
175 | "typescript": ">=2.7"
176 | },
177 | "peerDependenciesMeta": {
178 | "@swc/core": {
179 | "optional": true
180 | },
181 | "@swc/wasm": {
182 | "optional": true
183 | }
184 | }
185 | },
186 | "node_modules/typescript": {
187 | "version": "4.8.2",
188 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.8.2.tgz",
189 | "integrity": "sha512-C0I1UsrrDHo2fYI5oaCGbSejwX4ch+9Y5jTQELvovfmFkK3HHSZJB8MSJcWLmCUBzQBchCrZ9rMRV6GuNrvGtw==",
190 | "dev": true,
191 | "bin": {
192 | "tsc": "bin/tsc",
193 | "tsserver": "bin/tsserver"
194 | },
195 | "engines": {
196 | "node": ">=4.2.0"
197 | }
198 | },
199 | "node_modules/v8-compile-cache-lib": {
200 | "version": "3.0.1",
201 | "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
202 | "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
203 | "dev": true
204 | },
205 | "node_modules/yn": {
206 | "version": "3.1.1",
207 | "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
208 | "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
209 | "dev": true,
210 | "engines": {
211 | "node": ">=6"
212 | }
213 | }
214 | },
215 | "dependencies": {
216 | "@cspotcode/source-map-support": {
217 | "version": "0.8.1",
218 | "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
219 | "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
220 | "dev": true,
221 | "requires": {
222 | "@jridgewell/trace-mapping": "0.3.9"
223 | }
224 | },
225 | "@jridgewell/resolve-uri": {
226 | "version": "3.1.0",
227 | "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz",
228 | "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==",
229 | "dev": true
230 | },
231 | "@jridgewell/sourcemap-codec": {
232 | "version": "1.4.14",
233 | "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
234 | "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
235 | "dev": true
236 | },
237 | "@jridgewell/trace-mapping": {
238 | "version": "0.3.9",
239 | "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
240 | "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
241 | "dev": true,
242 | "requires": {
243 | "@jridgewell/resolve-uri": "^3.0.3",
244 | "@jridgewell/sourcemap-codec": "^1.4.10"
245 | }
246 | },
247 | "@tsconfig/node10": {
248 | "version": "1.0.9",
249 | "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz",
250 | "integrity": "sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==",
251 | "dev": true
252 | },
253 | "@tsconfig/node12": {
254 | "version": "1.0.11",
255 | "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
256 | "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==",
257 | "dev": true
258 | },
259 | "@tsconfig/node14": {
260 | "version": "1.0.3",
261 | "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
262 | "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==",
263 | "dev": true
264 | },
265 | "@tsconfig/node16": {
266 | "version": "1.0.3",
267 | "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.3.tgz",
268 | "integrity": "sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ==",
269 | "dev": true
270 | },
271 | "@types/node": {
272 | "version": "18.7.14",
273 | "resolved": "https://registry.npmjs.org/@types/node/-/node-18.7.14.tgz",
274 | "integrity": "sha512-6bbDaETVi8oyIARulOE9qF1/Qdi/23z6emrUh0fNJRUmjznqrixD4MpGDdgOFk5Xb0m2H6Xu42JGdvAxaJR/wA==",
275 | "dev": true,
276 | "peer": true
277 | },
278 | "acorn": {
279 | "version": "8.8.0",
280 | "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
281 | "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w==",
282 | "dev": true
283 | },
284 | "acorn-walk": {
285 | "version": "8.2.0",
286 | "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.2.0.tgz",
287 | "integrity": "sha512-k+iyHEuPgSw6SbuDpGQM+06HQUa04DZ3o+F6CSzXMvvI5KMvnaEqXe+YVe555R9nn6GPt404fos4wcgpw12SDA==",
288 | "dev": true
289 | },
290 | "arg": {
291 | "version": "4.1.3",
292 | "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
293 | "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
294 | "dev": true
295 | },
296 | "create-require": {
297 | "version": "1.1.1",
298 | "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
299 | "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
300 | "dev": true
301 | },
302 | "detect-file-encoding-and-language": {
303 | "version": "git+ssh://git@github.com/gignupg/Detect-File-Encoding-and-Language.git#f94553de99d25bfa7bcc2b9d6ebec3bfaea774ee",
304 | "from": "detect-file-encoding-and-language@git+https://github.com/gignupg/Detect-File-Encoding-and-Language.git"
305 | },
306 | "diff": {
307 | "version": "4.0.2",
308 | "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
309 | "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
310 | "dev": true
311 | },
312 | "make-error": {
313 | "version": "1.3.6",
314 | "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
315 | "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
316 | "dev": true
317 | },
318 | "ts-node": {
319 | "version": "10.9.1",
320 | "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz",
321 | "integrity": "sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==",
322 | "dev": true,
323 | "requires": {
324 | "@cspotcode/source-map-support": "^0.8.0",
325 | "@tsconfig/node10": "^1.0.7",
326 | "@tsconfig/node12": "^1.0.7",
327 | "@tsconfig/node14": "^1.0.0",
328 | "@tsconfig/node16": "^1.0.2",
329 | "acorn": "^8.4.1",
330 | "acorn-walk": "^8.1.1",
331 | "arg": "^4.1.0",
332 | "create-require": "^1.1.0",
333 | "diff": "^4.0.1",
334 | "make-error": "^1.1.1",
335 | "v8-compile-cache-lib": "^3.0.1",
336 | "yn": "3.1.1"
337 | }
338 | },
339 | "typescript": {
340 | "version": "4.8.2",
341 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.8.2.tgz",
342 | "integrity": "sha512-C0I1UsrrDHo2fYI5oaCGbSejwX4ch+9Y5jTQELvovfmFkK3HHSZJB8MSJcWLmCUBzQBchCrZ9rMRV6GuNrvGtw==",
343 | "dev": true
344 | },
345 | "v8-compile-cache-lib": {
346 | "version": "3.0.1",
347 | "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
348 | "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
349 | "dev": true
350 | },
351 | "yn": {
352 | "version": "3.1.1",
353 | "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
354 | "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
355 | "dev": true
356 | }
357 | }
358 | }
359 |
--------------------------------------------------------------------------------
/tests/node/node-ts-test/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "test",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "type": "module",
7 | "scripts": {
8 | "test": "ts-node-esm index.ts",
9 | "install-ts-node": "sudo npm install -g ts-node"
10 | },
11 | "keywords": [],
12 | "author": "",
13 | "license": "ISC",
14 | "devDependencies": {
15 | "ts-node": "^10.9.1",
16 | "typescript": "^4.8.2"
17 | },
18 | "dependencies": {
19 | "detect-file-encoding-and-language": "git+https://github.com/gignupg/Detect-File-Encoding-and-Language.git"
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/tests/node/node-ts-test/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | /* Visit https://aka.ms/tsconfig to read more about this file */
4 |
5 | /* Projects */
6 | // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
8 | // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
12 |
13 | /* Language and Environment */
14 | "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
15 | // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
16 | // "jsx": "preserve", /* Specify what JSX code is generated. */
17 | // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */
18 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
19 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
20 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
21 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
22 | // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
23 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
24 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
25 | // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
26 |
27 | /* Modules */
28 | "module": "ES2020", /* Specify what module code is generated. */
29 | // "rootDir": "./", /* Specify the root folder within your source files. */
30 | "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
31 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
32 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
33 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
34 | // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
35 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */
36 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
37 | // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
38 | // "resolveJsonModule": true, /* Enable importing .json files. */
39 | // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */
40 |
41 | /* JavaScript Support */
42 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
43 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
44 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
45 |
46 | /* Emit */
47 | // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
48 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */
49 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
50 | // "sourceMap": true, /* Create source map files for emitted JavaScript files. */
51 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
52 | // "outDir": "./", /* Specify an output folder for all emitted files. */
53 | // "removeComments": true, /* Disable emitting comments. */
54 | // "noEmit": true, /* Disable emitting files from a compilation. */
55 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
56 | // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
57 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
58 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
59 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
60 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
61 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
62 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
63 | // "newLine": "crlf", /* Set the newline character for emitting files. */
64 | // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
65 | // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
66 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
67 | // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
68 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */
69 | // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
70 |
71 | /* Interop Constraints */
72 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
73 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
74 | "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
75 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
76 | "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
77 |
78 | /* Type Checking */
79 | "strict": true, /* Enable all strict type-checking options. */
80 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
81 | // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
82 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
83 | // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
84 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
85 | // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
86 | // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
87 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
88 | // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
89 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
90 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
91 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
92 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
93 | // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
94 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
95 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
96 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
97 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
98 |
99 | /* Completeness */
100 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
101 | "skipLibCheck": true /* Skip type checking all .d.ts files. */
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/tests/node/node.test.js:
--------------------------------------------------------------------------------
1 | const languageEncoding = require("../../src/index-node.js");
2 | const fs = require("fs");
3 |
4 | // Making sure all important files are there
5 | // Checking for CLI, Node.js and Browser/UNPKG
6 | checkLocation("bin", "cli.js");
7 | checkLocation("src", "index-node.js");
8 | checkLocation("umd", "language-encoding.min.js");
9 |
10 | // Test all files in the 'language folders' dataset
11 | const folderPath = "/home/gignu/Documents/Subtitle Database/Language Folders/";
12 | const testFiles = getFiles(folderPath);
13 | const minConfidence = 0.95;
14 |
15 | testFiles.forEach((file) => {
16 | languageEncoding(file)
17 | .then((fileInfo) => {
18 | const testFileArray = file.split("/");
19 | const folderNameArr = testFileArray[testFileArray.length - 2].split('_');
20 | const expectedLanguage = folderNameArr ? folderNameArr[0] : null;
21 | const expectedEncoding = folderNameArr ? folderNameArr[1] : null;
22 |
23 | if (!expectedLanguage) {
24 | console.error("Expected language not found in folder name:", file.directoryHandle?.name);
25 | setError(file, fileInfo);
26 |
27 | } else if (!expectedEncoding) {
28 | console.error("Expected encoding not found in folder name:", file.directoryHandle?.name);
29 | setError(file, fileInfo);
30 |
31 | } else if (!fileInfo.confidence.encoding || fileInfo.confidence.encoding < minConfidence) {
32 | console.error("Encoding Confidence too low:", fileInfo.confidence.encoding);
33 | setError(file, fileInfo);
34 |
35 | } else if (!fileInfo.confidence.language || fileInfo.confidence.language < minConfidence) {
36 | console.error("Language Confidence too low:", fileInfo.confidence.language);
37 | setError(file, fileInfo);
38 |
39 | } else if (fileInfo.language !== expectedLanguage) {
40 | console.error(`Language mismatch! Expected ${expectedLanguage} but got ${fileInfo.language}`);
41 | setError(file, fileInfo);
42 |
43 | } else if (fileInfo.encoding !== expectedEncoding) {
44 | console.error(`Encoding mismatch! Expected ${expectedEncoding} but got ${fileInfo.encoding}`);
45 | setError(file, fileInfo);
46 | }
47 | })
48 | .catch((error) => {
49 | console.error(error);
50 | });
51 | });
52 |
53 | // Test buffer usage
54 | const buffer = Buffer.from("Content of a file");
55 | languageEncoding(buffer).then((bufferFileInfo) => {
56 | if (bufferFileInfo.encoding !== "UTF-8") {
57 | setError("buffer", bufferFileInfo);
58 | }
59 | });
60 |
61 | // Recursively find all files in a folder and all it's subdirectories
62 | function getFiles(dir, files_) {
63 | files_ = files_ || [];
64 | var files = fs.readdirSync(dir);
65 | for (var i in files) {
66 | var name = dir + "/" + files[i];
67 | if (fs.statSync(name).isDirectory()) {
68 | getFiles(name, files_);
69 | } else {
70 | files_.push(name);
71 | }
72 | }
73 | return files_;
74 | }
75 |
76 | function checkLocation(folder, file) {
77 | const dir = fs.readdirSync("/home/gignu/GitHub/Detect-File-Encoding-And-Language/" + folder);
78 | const fileFound = dir.some((fileName) => fileName === file);
79 | if (!fileFound) {
80 | console.error(`Error: Expected ${file} to be located here: /home/gignu/GitHub/Detect-File-Encoding-and-Language/${folder}`);
81 | process.exit(1);
82 | }
83 | }
84 |
85 | function setError(file, fileInfo) {
86 | console.info('fileInfo:', fileInfo);
87 | console.info('file:', file);
88 | process.exit(1);
89 | }
--------------------------------------------------------------------------------