├── .eslintrc.js ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── babel.config.js ├── install.js ├── package.json ├── src ├── __tests__ │ └── TextDecoder.test.ts ├── index.ts ├── install.ts └── install.web.ts ├── tsconfig.json └── yarn.lock /.eslintrc.js: -------------------------------------------------------------------------------- 1 | // @generated by expo-module-scripts 2 | module.exports = require('expo-module-scripts/eslintrc.base.js'); 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Serverless directories 108 | .serverless/ 109 | 110 | # FuseBox cache 111 | .fusebox/ 112 | 113 | # DynamoDB Local files 114 | .dynamodb/ 115 | 116 | # TernJS port file 117 | .tern-port 118 | 119 | # Stores VSCode versions used for testing VSCode extensions 120 | .vscode-test 121 | 122 | # yarn v2 123 | .yarn/cache 124 | .yarn/unplugged 125 | .yarn/build-state.yml 126 | .yarn/install-state.gz 127 | .pnp.* 128 | 129 | /build 130 | 131 | /.expo -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 evanbacon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # @bacons/text-decoder 2 | 3 | In Expo SDK 51 (React Native 74), Hermes supports TextEncoder natively but not TextDecoder. This library provides a TextDecoder implementation for Hermes that only supports UTF-8 (all legacy encodings are removed for bundle size). 4 | 5 | You can install it on the global with: 6 | 7 | ```js 8 | import "@bacons/text-decoder/install"; 9 | ``` 10 | 11 | Supports web, ios, android, server, and the upcoming Expo React Server environment for native platforms. 12 | 13 | The implementation is a fork of [`text-encoding`](https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/lib/encoding.js#L1) with all legacy encodings, and TextEncoder removed. 14 | 15 | The tests were ported over too to ensure everything works as described. 16 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | // @generated by expo-module-scripts 2 | module.exports = require("expo-module-scripts/babel.config.base"); 3 | -------------------------------------------------------------------------------- /install.js: -------------------------------------------------------------------------------- 1 | import "./src/install"; 2 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@bacons/text-decoder", 3 | "version": "0.0.0", 4 | "description": "A light-weight TextDecoder polyfill for React Native Hermes that only supports utf-8", 5 | "types": "build/index.d.ts", 6 | "main": "src/index.ts", 7 | "sideEffects": true, 8 | "scripts": { 9 | "build": "expo-module build", 10 | "clean": "expo-module clean", 11 | "lint": "expo-module lint", 12 | "test": "expo-module test", 13 | "prepare": "expo-module prepare", 14 | "prepublishOnly": "expo-module prepublishOnly", 15 | "expo-module": "expo-module" 16 | }, 17 | "files": [ 18 | "install.js", 19 | "build", 20 | "src", 21 | "!**/__tests__" 22 | ], 23 | "repository": { 24 | "type": "git", 25 | "url": "https://github.com/evanbacon/text-decoder-polyfill.git" 26 | }, 27 | "peerDependencies": { 28 | "react-native": "*" 29 | }, 30 | "keywords": [ 31 | "expo" 32 | ], 33 | "author": "Evan Bacon", 34 | "license": "MIT", 35 | "jest": { 36 | "preset": "expo-module-scripts/ios" 37 | }, 38 | "devDependencies": { 39 | "expo": "^51.0.0", 40 | "expo-module-scripts": "^3.5.1", 41 | "jest": "^29.7.0", 42 | "react-native": "~0.74.1" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/__tests__/TextDecoder.test.ts: -------------------------------------------------------------------------------- 1 | import "../install"; 2 | 3 | describe("TextDecoder", () => { 4 | // https://github.com/inexorabletash/text-encoding/blob/master/test/test-big5.js 5 | 6 | it(`uses the Expo built-in APIs`, () => { 7 | expect(TextDecoder[Symbol.for("expo.builtin")]).toBe(true); 8 | }); 9 | 10 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L34C1-L47C18 11 | it(`has expected attributes`, () => { 12 | expect("encoding" in new TextEncoder()).toBe(true); 13 | expect(new TextEncoder().encoding).toBe("utf-8"); 14 | 15 | expect("encoding" in new TextDecoder()).toBe(true); 16 | 17 | expect(new TextDecoder().encoding).toBe("utf-8"); 18 | expect(new TextDecoder("utf-8").encoding).toBe("utf-8"); 19 | 20 | expect("fatal" in new TextDecoder()).toBe(true); 21 | expect(new TextDecoder("utf-8").fatal).toBe(false); 22 | expect(new TextDecoder("utf-8", { fatal: true }).fatal).toBe(true); 23 | 24 | expect("ignoreBOM" in new TextDecoder()).toBe(true); 25 | expect(new TextDecoder("utf-8").ignoreBOM).toBe(false); 26 | expect(new TextDecoder("utf-8", { ignoreBOM: true }).ignoreBOM).toBe(true); 27 | }); 28 | 29 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L49C1-L64C16 30 | it(`handles bad data`, () => { 31 | [ 32 | { input: "\ud800", expected: "\ufffd" }, // Surrogate half 33 | { input: "\udc00", expected: "\ufffd" }, // Surrogate half 34 | { input: "abc\ud800def", expected: "abc\ufffddef" }, // Surrogate half 35 | { input: "abc\udc00def", expected: "abc\ufffddef" }, // Surrogate half 36 | { input: "\udc00\ud800", expected: "\ufffd\ufffd" }, // Wrong order 37 | ].forEach(({ input, expected }) => { 38 | const encoded = new TextEncoder().encode(input); 39 | const decoded = new TextDecoder().decode(encoded); 40 | expect(expected).toBe(decoded); 41 | }); 42 | }); 43 | 44 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L66C1-L91C18 45 | describe("fatal flag", () => { 46 | const bad = [ 47 | { encoding: "utf-8", input: [0xc0] }, // ends early 48 | { encoding: "utf-8", input: [0xc0, 0x00] }, // invalid trail 49 | { encoding: "utf-8", input: [0xc0, 0xc0] }, // invalid trail 50 | { encoding: "utf-8", input: [0xe0] }, // ends early 51 | { encoding: "utf-8", input: [0xe0, 0x00] }, // invalid trail 52 | { encoding: "utf-8", input: [0xe0, 0xc0] }, // invalid trail 53 | { encoding: "utf-8", input: [0xe0, 0x80, 0x00] }, // invalid trail 54 | { encoding: "utf-8", input: [0xe0, 0x80, 0xc0] }, // invalid trail 55 | { encoding: "utf-8", input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF 56 | ]; 57 | 58 | bad.forEach((t) => { 59 | it(`should throw a TypeError for encoding ${t.encoding} and input ${t.input}`, () => { 60 | expect(() => { 61 | new TextDecoder(t.encoding, { fatal: true }).decode( 62 | new Uint8Array(t.input) 63 | ); 64 | }).toThrow(TypeError); 65 | }); 66 | }); 67 | }); 68 | 69 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L93C1-L108C43 70 | it("Encoding names are case insensitive", () => { 71 | const encodings = [{ label: "utf-8", encoding: "utf-8" }]; 72 | 73 | encodings.forEach((test) => { 74 | expect(new TextDecoder(test.label.toLowerCase()).encoding).toBe( 75 | test.encoding 76 | ); 77 | expect(new TextDecoder(test.label.toUpperCase()).encoding).toBe( 78 | test.encoding 79 | ); 80 | }); 81 | }); 82 | 83 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L110C1-L163C24 84 | it("Byte-order marks", () => { 85 | const utf8Bom = [0xef, 0xbb, 0xbf]; 86 | const utf8 = [ 87 | 0x7a, 0xc2, 0xa2, 0xe6, 0xb0, 0xb4, 0xf0, 0x9d, 0x84, 0x9e, 0xf4, 0x8f, 88 | 0xbf, 0xbd, 89 | ]; 90 | 91 | const utf16leBom = [0xff, 0xfe]; 92 | 93 | const utf16beBom = [0xfe, 0xff]; 94 | 95 | const string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character 96 | 97 | // missing BOMs 98 | expect(new TextDecoder("utf-8").decode(new Uint8Array(utf8))).toBe(string); 99 | 100 | // matching BOMs 101 | expect( 102 | new TextDecoder("utf-8").decode(new Uint8Array([...utf8Bom, ...utf8])) 103 | ).toBe(string); 104 | 105 | // mismatching BOMs 106 | expect( 107 | new TextDecoder("utf-8").decode(new Uint8Array([...utf16leBom, ...utf8])) 108 | ).not.toBe(string); 109 | expect( 110 | new TextDecoder("utf-8").decode(new Uint8Array([...utf16beBom, ...utf8])) 111 | ).not.toBe(string); 112 | 113 | // ignore BOMs 114 | expect( 115 | new TextDecoder("utf-8", { ignoreBOM: true }).decode( 116 | new Uint8Array([...utf8Bom, ...utf8]) 117 | ) 118 | ).toBe("\uFEFF" + string); 119 | }); 120 | 121 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L165 122 | describe("Encoding names", () => { 123 | it("should return canonical case for utf-8", () => { 124 | expect(new TextDecoder("utf-8").encoding).toBe("utf-8"); 125 | }); 126 | }); 127 | 128 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L173 129 | describe("Streaming Decode", () => { 130 | const string = 131 | "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF"; 132 | const cases = [ 133 | { 134 | encoding: "utf-8", 135 | encoded: [ 136 | 0, 49, 50, 51, 65, 66, 67, 97, 98, 99, 194, 128, 195, 191, 196, 128, 137 | 225, 128, 128, 239, 191, 189, 240, 144, 128, 128, 244, 143, 191, 191, 138 | ], 139 | }, 140 | ]; 141 | 142 | cases.forEach((c) => { 143 | it(`should correctly stream decode ${c.encoding}`, () => { 144 | for (let len = 1; len <= 5; ++len) { 145 | let out = ""; 146 | const decoder = new TextDecoder(c.encoding); 147 | for (let i = 0; i < c.encoded.length; i += len) { 148 | const sub = c.encoded.slice(i, i + len); 149 | out += decoder.decode(new Uint8Array(sub), { stream: true }); 150 | } 151 | out += decoder.decode(); 152 | expect(out).toBe(string); 153 | } 154 | }); 155 | }); 156 | }); 157 | 158 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L218 159 | describe("Supersets of ASCII decode ASCII correctly", () => { 160 | it.each(["utf-8"])( 161 | "should decode ASCII correctly for encoding: %s", 162 | (encoding) => { 163 | let string = ""; 164 | const bytes = []; 165 | for (let i = 0; i < 128; ++i) { 166 | // Encodings that have escape codes in 0x00-0x7F 167 | if ( 168 | encoding === "iso-2022-jp" && 169 | (i === 0x0e || i === 0x0f || i === 0x1b) 170 | ) { 171 | continue; 172 | } 173 | 174 | string += String.fromCharCode(i); 175 | // @ts-expect-error 176 | bytes.push(i); 177 | } 178 | const ascii_encoded = new TextEncoder().encode(string); 179 | expect(new TextDecoder(encoding).decode(ascii_encoded)).toBe(string); 180 | } 181 | ); 182 | }); 183 | 184 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L236 185 | describe("Non-fatal errors at EOF", () => { 186 | it("should throw TypeError for fatal utf-8", () => { 187 | expect(() => 188 | new TextDecoder("utf-8", { fatal: true }).decode(new Uint8Array([0xff])) 189 | ).toThrow(TypeError); 190 | new TextDecoder("utf-8").decode(new Uint8Array([0xff])); 191 | }); 192 | }); 193 | 194 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L257 195 | 196 | describe("Replacement encoding labels", () => { 197 | const replacementEncodings = [ 198 | "csiso2022kr", 199 | "hz-gb-2312", 200 | "iso-2022-cn", 201 | "iso-2022-cn-ext", 202 | "iso-2022-kr", 203 | ]; 204 | 205 | it.each(replacementEncodings)( 206 | "should handle replacement encoding %s correctly", 207 | (encoding) => { 208 | expect( 209 | new TextEncoder( 210 | // @ts-expect-error 211 | encoding 212 | ).encoding 213 | ).toBe("utf-8"); 214 | 215 | expect(() => new TextDecoder(encoding, { fatal: true })).toThrow( 216 | RangeError 217 | ); 218 | 219 | expect(() => new TextDecoder(encoding, { fatal: false })).toThrow( 220 | RangeError 221 | ); 222 | } 223 | ); 224 | }); 225 | 226 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L280 227 | describe("ArrayBuffer, ArrayBufferView and buffer offsets", () => { 228 | it("should decode correctly from various buffer views", () => { 229 | const decoder = new TextDecoder(); 230 | const bytes = [ 231 | 65, 66, 97, 98, 99, 100, 101, 102, 103, 104, 67, 68, 69, 70, 71, 72, 232 | ]; 233 | const chars = "ABabcdefghCDEFGH"; 234 | const buffer = new Uint8Array(bytes).buffer; 235 | 236 | expect(decoder.decode(buffer)).toBe(chars); 237 | 238 | const types = [ 239 | "Uint8Array", 240 | "Int8Array", 241 | "Uint8ClampedArray", 242 | "Uint16Array", 243 | "Int16Array", 244 | "Uint32Array", 245 | "Int32Array", 246 | "Float32Array", 247 | "Float64Array", 248 | ]; 249 | 250 | types.forEach((typeName) => { 251 | const TypeConstructor = globalThis[typeName]; 252 | const array = new TypeConstructor(buffer); 253 | 254 | expect(decoder.decode(array)).toBe(chars); 255 | 256 | const subset = new TypeConstructor( 257 | buffer, 258 | TypeConstructor.BYTES_PER_ELEMENT, 259 | 8 / TypeConstructor.BYTES_PER_ELEMENT 260 | ); 261 | expect(decoder.decode(subset)).toBe( 262 | chars.substring( 263 | TypeConstructor.BYTES_PER_ELEMENT, 264 | TypeConstructor.BYTES_PER_ELEMENT + 8 265 | ) 266 | ); 267 | }); 268 | }); 269 | }); 270 | 271 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L305 272 | describe("Invalid parameters", () => { 273 | it("should throw errors for invalid parameters", () => { 274 | expect( 275 | () => 276 | // @ts-expect-error 277 | new TextDecoder(null) 278 | ).toThrow(RangeError); 279 | 280 | expect( 281 | () => 282 | // @ts-expect-error 283 | new TextDecoder("utf-8", "") 284 | ).toThrow(TypeError); 285 | 286 | expect(() => 287 | // @ts-expect-error 288 | new TextDecoder("utf-8").decode(null, "") 289 | ).toThrow(TypeError); 290 | }); 291 | }); 292 | 293 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/test/test-misc.js#L383 294 | it("encode() called with falsy arguments (polyfill bindings)", () => { 295 | const encoder = new TextEncoder(); 296 | // @ts-expect-error 297 | expect([...encoder.encode(false)]).toEqual([102, 97, 108, 115, 101]); 298 | // @ts-expect-error 299 | expect([...encoder.encode(0)]).toEqual([48]); 300 | }); 301 | 302 | // https://github.com/inexorabletash/text-encoding/blob/master/test/test-utf.js 303 | describe("UTF-8 - Encode/Decode - reference sample", () => { 304 | // z, cent, CJK water, G-Clef, Private-use character 305 | const sample = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; 306 | const cases = [ 307 | { 308 | encoding: "utf-8", 309 | expected: [ 310 | 0x7a, 0xc2, 0xa2, 0xe6, 0xb0, 0xb4, 0xf0, 0x9d, 0x84, 0x9e, 0xf4, 311 | 0x8f, 0xbf, 0xbd, 312 | ], 313 | }, 314 | ]; 315 | 316 | cases.forEach(function (t) { 317 | it("expected equal decodings - " + t.encoding, () => { 318 | const decoded = new TextDecoder(t.encoding).decode( 319 | new Uint8Array(t.expected) 320 | ); 321 | 322 | expect(decoded).toBe(sample); 323 | }); 324 | }); 325 | }); 326 | 327 | it("UTF-8 - Encode/Decode - full roundtrip and agreement with encode/decodeURIComponent", () => { 328 | function assertStringEquals(actual, expected, description) { 329 | // short circuit success case 330 | if (actual === expected) { 331 | return; 332 | } 333 | 334 | // length check 335 | expect(actual.length).toBe(expected.length); 336 | 337 | for (let i = 0; i < actual.length; i++) { 338 | const a = actual.charCodeAt(i); 339 | const b = expected.charCodeAt(i); 340 | if (a !== b) { 341 | throw new Error( 342 | description + 343 | ": code unit " + 344 | i.toString() + 345 | " unequal: " + 346 | cpname(a) + 347 | " != " + 348 | cpname(b) 349 | ); 350 | } 351 | } 352 | 353 | // It should be impossible to get here, because the initial 354 | // comparison failed, so either the length comparison or the 355 | // codeunit-by-codeunit comparison should also fail. 356 | throw new Error(description + ": failed to detect string difference"); 357 | } 358 | 359 | // Inspired by: 360 | // http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html 361 | function encodeUtf8(string) { 362 | const utf8 = unescape(encodeURIComponent(string)); 363 | const octets = new Uint8Array(utf8.length); 364 | for (let i = 0; i < utf8.length; i += 1) { 365 | octets[i] = utf8.charCodeAt(i); 366 | } 367 | return octets; 368 | } 369 | 370 | function decodeUtf8(octets) { 371 | const utf8 = String.fromCharCode.apply(null, octets); 372 | return decodeURIComponent(escape(utf8)); 373 | } 374 | 375 | // Helpers for test_utf_roundtrip. 376 | function cpname(n) { 377 | if (n + 0 !== n) return n.toString(); 378 | const w = n <= 0xffff ? 4 : 6; 379 | return "U+" + ("000000" + n.toString(16).toUpperCase()).slice(-w); 380 | } 381 | 382 | function genblock(from, len, skip) { 383 | const block = []; 384 | for (let i = 0; i < len; i += skip) { 385 | let cp = from + i; 386 | if (0xd800 <= cp && cp <= 0xdfff) continue; 387 | if (cp < 0x10000) { 388 | // @ts-expect-error 389 | block.push(String.fromCharCode(cp)); 390 | continue; 391 | } 392 | cp = cp - 0x10000; 393 | // @ts-expect-error 394 | block.push(String.fromCharCode(0xd800 + (cp >> 10))); 395 | // @ts-expect-error 396 | block.push(String.fromCharCode(0xdc00 + (cp & 0x3ff))); 397 | } 398 | return block.join(""); 399 | } 400 | 401 | const MIN_CODEPOINT = 0; 402 | const MAX_CODEPOINT = 0x10ffff; 403 | const BLOCK_SIZE = 0x1000; 404 | const SKIP_SIZE = 31; 405 | 406 | const TE_U8 = new TextEncoder(); 407 | const TD_U8 = new TextDecoder("UTF-8"); 408 | 409 | for (let i = MIN_CODEPOINT; i < MAX_CODEPOINT; i += BLOCK_SIZE) { 410 | const block_tag = cpname(i) + " - " + cpname(i + BLOCK_SIZE - 1); 411 | const block = genblock(i, BLOCK_SIZE, SKIP_SIZE); 412 | 413 | // test UTF-8 encodings against themselves 414 | 415 | const encoded = TE_U8.encode(block); 416 | const decoded = TD_U8.decode(encoded); 417 | assertStringEquals(block, decoded, "UTF-8 round trip " + block_tag); 418 | 419 | // test TextEncoder(UTF-8) against the older idiom 420 | const expEncoded = encodeUtf8(block); 421 | 422 | expect(encoded.length).toBe(expEncoded.length); 423 | // assert_array_equals(encoded, expEncoded, 'UTF-8 reference encoding ' + block_tag); 424 | 425 | const expDecoded = decodeUtf8(expEncoded); 426 | assertStringEquals( 427 | decoded, 428 | expDecoded, 429 | "UTF-8 reference decoding " + block_tag 430 | ); 431 | } 432 | }); 433 | }); 434 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | // A fork of text-encoding but with only UTF-8 decoder. `TextEncoder` is in Hermes and we only need utf-8 decoder for RSC. 2 | // 3 | // https://github.com/inexorabletash/text-encoding/blob/3f330964c0e97e1ed344c2a3e963f4598610a7ad/lib/encoding.js#L1 4 | 5 | /** 6 | * Checks if a number is within a specified range. 7 | * @param {number} a The number to test. 8 | * @param {number} min The minimum value in the range, inclusive. 9 | * @param {number} max The maximum value in the range, inclusive. 10 | * @returns {boolean} True if a is within the range. 11 | */ 12 | function inRange(a: number, min: number, max: number): boolean { 13 | return min <= a && a <= max; 14 | } 15 | 16 | /** 17 | * Converts an array of code points to a string. 18 | * @param {number[]} codePoints Array of code points. 19 | * @returns {string} The string representation. 20 | */ 21 | function codePointsToString(codePoints: number[]): string { 22 | let s = ""; 23 | for (let i = 0; i < codePoints.length; ++i) { 24 | let cp = codePoints[i]; 25 | if (cp <= 0xffff) { 26 | s += String.fromCharCode(cp); 27 | } else { 28 | cp -= 0x10000; 29 | s += String.fromCharCode((cp >> 10) + 0xd800, (cp & 0x3ff) + 0xdc00); 30 | } 31 | } 32 | return s; 33 | } 34 | 35 | function normalizeBytes(input?: ArrayBuffer | DataView): Uint8Array { 36 | if (typeof input === "object" && input instanceof ArrayBuffer) { 37 | return new Uint8Array(input); 38 | } else if ( 39 | typeof input === "object" && 40 | "buffer" in input && 41 | input.buffer instanceof ArrayBuffer 42 | ) { 43 | return new Uint8Array(input.buffer, input.byteOffset, input.byteLength); 44 | } 45 | return new Uint8Array(0); 46 | } 47 | 48 | /** 49 | * End-of-stream is a special token that signifies no more tokens 50 | * are in the stream. 51 | */ 52 | const END_OF_STREAM = -1; 53 | 54 | const FINISHED = -1; 55 | 56 | /** 57 | * A stream represents an ordered sequence of tokens. 58 | * 59 | * @constructor 60 | * @param {!(number[]|Uint8Array)} tokens Array of tokens that provide the stream. 61 | */ 62 | class Stream { 63 | private tokens: number[]; 64 | 65 | constructor(tokens: number[] | Uint8Array) { 66 | this.tokens = Array.prototype.slice.call(tokens); 67 | // Reversed as push/pop is more efficient than shift/unshift. 68 | this.tokens.reverse(); 69 | } 70 | 71 | /** 72 | * @return {boolean} True if end-of-stream has been hit. 73 | */ 74 | endOfStream(): boolean { 75 | return !this.tokens.length; 76 | } 77 | 78 | /** 79 | * When a token is read from a stream, the first token in the 80 | * stream must be returned and subsequently removed, and 81 | * end-of-stream must be returned otherwise. 82 | * 83 | * @return {number} Get the next token from the stream, or 84 | * end_of_stream. 85 | */ 86 | read(): number { 87 | if (!this.tokens.length) return END_OF_STREAM; 88 | return this.tokens.pop()!; 89 | } 90 | 91 | /** 92 | * When one or more tokens are prepended to a stream, those tokens 93 | * must be inserted, in given order, before the first token in the 94 | * stream. 95 | * 96 | * @param token The token(s) to prepend to the stream. 97 | */ 98 | prepend(token: number | number[]): void { 99 | if (Array.isArray(token)) { 100 | while (token.length) this.tokens.push(token.pop()!); 101 | } else { 102 | this.tokens.push(token); 103 | } 104 | } 105 | 106 | /** 107 | * When one or more tokens are pushed to a stream, those tokens 108 | * must be inserted, in given order, after the last token in the 109 | * stream. 110 | * 111 | * @param token The tokens(s) to push to the stream. 112 | */ 113 | push(token: number | number[]): void { 114 | if (Array.isArray(token)) { 115 | while (token.length) this.tokens.unshift(token.shift()!); 116 | } else { 117 | this.tokens.unshift(token); 118 | } 119 | } 120 | } 121 | 122 | function decoderError(fatal: boolean, opt_code_point?: number) { 123 | if (fatal) throw TypeError("Decoder error"); 124 | return opt_code_point || 0xfffd; 125 | } 126 | 127 | interface Encoding { 128 | name: string; 129 | labels: string[]; 130 | } 131 | 132 | const LABEL_ENCODING_MAP: { [key: string]: Encoding } = {}; 133 | 134 | function getEncoding(label: string): Encoding | null { 135 | label = label.trim().toLowerCase(); 136 | if (label in LABEL_ENCODING_MAP) { 137 | return LABEL_ENCODING_MAP[label]; 138 | } 139 | return null; 140 | } 141 | 142 | /** [Encodings table](https://encoding.spec.whatwg.org/encodings.json) (Incomplete as we only need TextDecoder utf8 in Expo RSC. A more complete implementation should be added to Hermes as native code.) */ 143 | const ENCODING_MAP: { heading: string; encodings: Encoding[] }[] = [ 144 | { 145 | encodings: [ 146 | { 147 | labels: ["unicode-1-1-utf-8", "utf-8", "utf8"], 148 | name: "UTF-8", 149 | }, 150 | ], 151 | heading: "The Encoding", 152 | }, 153 | ]; 154 | 155 | ENCODING_MAP.forEach((category) => { 156 | category.encodings.forEach((encoding) => { 157 | encoding.labels.forEach((label) => { 158 | LABEL_ENCODING_MAP[label] = encoding; 159 | }); 160 | }); 161 | }); 162 | 163 | // Registry of of encoder/decoder factories, by encoding name. 164 | const DECODERS: { 165 | [key: string]: (options: { fatal: boolean }) => UTF8Decoder; 166 | } = { 167 | "UTF-8": (options) => new UTF8Decoder(options), 168 | }; 169 | 170 | // 9.1.1 utf-8 decoder 171 | 172 | interface Decoder { 173 | handler: (stream: Stream, bite: number) => number | number[] | null | -1; 174 | } 175 | 176 | class UTF8Decoder implements Decoder { 177 | // utf-8's decoder's has an associated utf-8 code point, utf-8 178 | // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8 179 | // lower boundary (initially 0x80), and a utf-8 upper boundary 180 | // (initially 0xBF). 181 | private utf8CodePoint = 0; 182 | private utf8BytesSeen = 0; 183 | private utf8BytesNeeded = 0; 184 | private utf8LowerBoundary = 0x80; 185 | private utf8UpperBoundary = 0xbf; 186 | constructor(private options: { fatal: boolean }) {} 187 | /** 188 | * @param {Stream} stream The stream of bytes being decoded. 189 | * @param {number} bite The next byte read from the stream. 190 | * @return {?(number|!Array.)} The next code point(s) 191 | * decoded, or null if not enough data exists in the input 192 | * stream to decode a complete code point. 193 | */ 194 | handler(stream: Stream, bite: number): number | null | -1 { 195 | // 1. If byte is end-of-stream and utf-8 bytes needed is not 0, 196 | // set utf-8 bytes needed to 0 and return error. 197 | if (bite === END_OF_STREAM && this.utf8BytesNeeded !== 0) { 198 | this.utf8BytesNeeded = 0; 199 | return decoderError(this.options.fatal); 200 | } 201 | 202 | // 2. If byte is end-of-stream, return finished. 203 | if (bite === END_OF_STREAM) return FINISHED; 204 | 205 | // 3. If utf-8 bytes needed is 0, based on byte: 206 | if (this.utf8BytesNeeded === 0) { 207 | // 0x00 to 0x7F 208 | if (inRange(bite, 0x00, 0x7f)) { 209 | // Return a code point whose value is byte. 210 | return bite; 211 | } 212 | 213 | // 0xC2 to 0xDF 214 | else if (inRange(bite, 0xc2, 0xdf)) { 215 | // 1. Set utf-8 bytes needed to 1. 216 | this.utf8BytesNeeded = 1; 217 | 218 | // 2. Set UTF-8 code point to byte & 0x1F. 219 | this.utf8CodePoint = bite & 0x1f; 220 | } 221 | 222 | // 0xE0 to 0xEF 223 | else if (inRange(bite, 0xe0, 0xef)) { 224 | // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0. 225 | if (bite === 0xe0) this.utf8LowerBoundary = 0xa0; 226 | // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F. 227 | if (bite === 0xed) this.utf8UpperBoundary = 0x9f; 228 | // 3. Set utf-8 bytes needed to 2. 229 | this.utf8BytesNeeded = 2; 230 | // 4. Set UTF-8 code point to byte & 0xF. 231 | this.utf8CodePoint = bite & 0xf; 232 | } 233 | 234 | // 0xF0 to 0xF4 235 | else if (inRange(bite, 0xf0, 0xf4)) { 236 | // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90. 237 | if (bite === 0xf0) this.utf8LowerBoundary = 0x90; 238 | // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F. 239 | if (bite === 0xf4) this.utf8UpperBoundary = 0x8f; 240 | // 3. Set utf-8 bytes needed to 3. 241 | this.utf8BytesNeeded = 3; 242 | // 4. Set UTF-8 code point to byte & 0x7. 243 | this.utf8CodePoint = bite & 0x7; 244 | } 245 | 246 | // Otherwise 247 | else { 248 | // Return error. 249 | return decoderError(this.options.fatal); 250 | } 251 | 252 | // Return continue. 253 | return null; 254 | } 255 | 256 | // 4. If byte is not in the range utf-8 lower boundary to utf-8 257 | // upper boundary, inclusive, run these substeps: 258 | if (!inRange(bite, this.utf8LowerBoundary, this.utf8UpperBoundary)) { 259 | // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8 260 | // bytes seen to 0, set utf-8 lower boundary to 0x80, and set 261 | // utf-8 upper boundary to 0xBF. 262 | this.utf8CodePoint = 0; 263 | this.utf8BytesNeeded = 0; 264 | this.utf8BytesSeen = 0; 265 | this.utf8LowerBoundary = 0x80; 266 | this.utf8UpperBoundary = 0xbf; 267 | 268 | // 2. Prepend byte to stream. 269 | stream.prepend(bite); 270 | 271 | // 3. Return error. 272 | return decoderError(this.options.fatal); 273 | } 274 | 275 | // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary 276 | // to 0xBF. 277 | this.utf8LowerBoundary = 0x80; 278 | this.utf8UpperBoundary = 0xbf; 279 | 280 | // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte & 281 | // 0x3F) 282 | this.utf8CodePoint = (this.utf8CodePoint << 6) | (bite & 0x3f); 283 | 284 | // 7. Increase utf-8 bytes seen by one. 285 | this.utf8BytesSeen += 1; 286 | 287 | // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed, 288 | // continue. 289 | if (this.utf8BytesSeen !== this.utf8BytesNeeded) return null; 290 | 291 | // 9. Let code point be utf-8 code point. 292 | const code_point = this.utf8CodePoint; 293 | 294 | // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes 295 | // seen to 0. 296 | this.utf8CodePoint = 0; 297 | this.utf8BytesNeeded = 0; 298 | this.utf8BytesSeen = 0; 299 | 300 | // 11. Return a code point whose value is code point. 301 | return code_point; 302 | } 303 | } 304 | 305 | // 8.1 Interface TextDecoder 306 | 307 | export class TextDecoder { 308 | private _encoding: Encoding | null; 309 | private _ignoreBOM: boolean; 310 | private _errorMode: string; 311 | private _BOMseen: boolean = false; 312 | private _doNotFlush: boolean = false; 313 | private _decoder: UTF8Decoder | null = null; 314 | 315 | constructor( 316 | label: string = "utf-8", 317 | options: { 318 | fatal?: boolean; 319 | ignoreBOM?: boolean; 320 | } = {} 321 | ) { 322 | if (options != null && typeof options !== "object") { 323 | throw new TypeError( 324 | "Second argument of TextDecoder must be undefined or an object, e.g. { fatal: true }" 325 | ); 326 | } 327 | 328 | const normalizedLabel = String(label).trim().toLowerCase(); 329 | const encoding = getEncoding(normalizedLabel); 330 | if (encoding === null || encoding.name === "replacement") { 331 | throw new RangeError( 332 | `Unknown encoding: ${label} (normalized: ${normalizedLabel})` 333 | ); 334 | } 335 | 336 | if (!DECODERS[encoding.name]) { 337 | throw new Error(`Decoder not present: ${encoding.name}`); 338 | } 339 | 340 | this._encoding = encoding; 341 | this._ignoreBOM = !!options.ignoreBOM; 342 | this._errorMode = options.fatal ? "fatal" : "replacement"; 343 | } 344 | 345 | // Getter methods for encoding, fatal, and ignoreBOM 346 | get encoding(): string { 347 | return this._encoding?.name.toLowerCase() ?? ""; 348 | } 349 | 350 | get fatal(): boolean { 351 | return this._errorMode === "fatal"; 352 | } 353 | 354 | get ignoreBOM(): boolean { 355 | return this._ignoreBOM; 356 | } 357 | 358 | decode( 359 | input?: ArrayBuffer | DataView, 360 | options: { stream?: boolean } = {} 361 | ): string { 362 | const bytes = normalizeBytes(input); 363 | 364 | // 1. If the do not flush flag is unset, set decoder to a new 365 | // encoding's decoder, set stream to a new stream, and unset the 366 | // BOM seen flag. 367 | if (!this._doNotFlush) { 368 | this._decoder = DECODERS[this._encoding!.name]({ 369 | fatal: this.fatal, 370 | }); 371 | this._BOMseen = false; 372 | } 373 | 374 | // 2. If options's stream is true, set the do not flush flag, and 375 | // unset the do not flush flag otherwise. 376 | this._doNotFlush = Boolean(options["stream"]); 377 | 378 | // 3. If input is given, push a copy of input to stream. 379 | // TODO: Align with spec algorithm - maintain stream on instance. 380 | const input_stream = new Stream(bytes); 381 | 382 | // 4. Let output be a new stream. 383 | const output: number[] = []; 384 | 385 | while (true) { 386 | const token = input_stream.read(); 387 | 388 | if (token === END_OF_STREAM) break; 389 | 390 | const result = this._decoder!.handler(input_stream, token); 391 | 392 | if (result === FINISHED) break; 393 | 394 | if (result !== null) { 395 | output.push(result); 396 | } 397 | } 398 | 399 | if (!this._doNotFlush) { 400 | do { 401 | const result = this._decoder!.handler( 402 | input_stream, 403 | input_stream.read() 404 | ); 405 | if (result === FINISHED) break; 406 | if (result === null) continue; 407 | if (Array.isArray(result)) output.push.apply(output, result); 408 | else output.push(result); 409 | } while (!input_stream.endOfStream()); 410 | this._decoder = null; 411 | } 412 | 413 | return this.serializeStream(output); 414 | } 415 | 416 | // serializeStream method for converting code points to a string 417 | private serializeStream(stream: number[]): string { 418 | if (this._encoding!.name === "UTF-8") { 419 | if (!this._ignoreBOM && !this._BOMseen && stream[0] === 0xfeff) { 420 | // If BOM is detected at the start of the stream and we're not ignoring it 421 | this._BOMseen = true; 422 | stream.shift(); // Remove the BOM 423 | } else if (stream.length > 0) { 424 | this._BOMseen = true; 425 | } 426 | } 427 | 428 | // Convert the stream of code points to a string 429 | return codePointsToString(stream); 430 | } 431 | } 432 | -------------------------------------------------------------------------------- /src/install.ts: -------------------------------------------------------------------------------- 1 | // This file configures the runtime environment to increase compatibility with WinterCG. 2 | // https://wintercg.org/ 3 | import { polyfillGlobal as installGlobal } from "react-native/Libraries/Utilities/PolyfillFunctions"; 4 | 5 | // Add a well-known shared symbol that doesn't show up in iteration or inspection 6 | // this can be used to detect if the global object abides by the Expo team's documented 7 | // built-in requirements. 8 | const BUILTIN_SYMBOL = Symbol.for("expo.builtin"); 9 | 10 | // Prevent installing in server runtimes that target native platforms, e.g. Expo RSC. 11 | 12 | if ( 13 | (typeof globalThis !== "undefined" && !("TextDecoder" in globalThis)) || 14 | // Always polyfill in test environments 15 | typeof expect !== "undefined" 16 | ) { 17 | function addBuiltinSymbol(obj: object) { 18 | Object.defineProperty(obj, BUILTIN_SYMBOL, { 19 | value: true, 20 | enumerable: false, 21 | configurable: false, 22 | }); 23 | return obj; 24 | } 25 | 26 | function install(name: string, getValue: () => any) { 27 | installGlobal(name, () => addBuiltinSymbol(getValue())); 28 | } 29 | install("TextDecoder", () => require("./index").TextDecoder); 30 | } 31 | -------------------------------------------------------------------------------- /src/install.web.ts: -------------------------------------------------------------------------------- 1 | // noop 2 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "expo-module-scripts/tsconfig.base", 3 | "compilerOptions": { 4 | "declarationMap": true, 5 | "sourceMap": false, 6 | "inlineSources": false, 7 | "emitDeclarationOnly": true, 8 | "outDir": "./build" 9 | }, 10 | "include": ["./src"], 11 | "exclude": ["**/__mocks__/*", "**/__tests__/*"] 12 | } 13 | --------------------------------------------------------------------------------